In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [None]:
    np.random.seed(42)

    time = np.linspace(0, 10, 1000)
    flux = 1 + 0.05 * np.sin(2 * np.pi * 1.5 * time)  # Simulating stellar variability

    num_periods = 1000 // 3
    periods = np.repeat([2, 4, 6], num_periods)
    periods = np.tile(periods, 1000 // num_periods)

    num_durations = 1000 // 3
    durations = np.repeat([1, 1.5, 2], num_durations)
    durations = np.tile(durations, 1000 // num_durations)

    num_depths = 1000 // 3
    depths = np.repeat([0.02, 0.03, 0.01], num_depths)
    depths = np.tile(depths, 1000 // num_depths)

    data = pd.DataFrame({
        'Time': time,
        'Flux': flux,
        'Period': periods,
        'Transit_Duration': durations,
        'Transit_Depth': depths,
        'Label': np.concatenate([np.zeros(1000), np.ones(1000), np.zeros(1000 * 2)])

    })


In [None]:
# Function to generate synthetic dataset with multiple exoplanets
def generate_synthetic_dataset(num_samples=1000):
    np.random.seed(42)

    time = np.linspace(0, 10, num_samples)
    flux = 1 + 0.05 * np.sin(2 * np.pi * 1.5 * time)  # Simulating stellar variability

    num_periods = num_samples // 3
    periods = np.repeat([2, 4, 6], num_periods)
    periods = np.tile(periods, num_samples // num_periods)

    num_durations = num_samples // 3
    durations = np.repeat([1, 1.5, 2], num_durations)
    durations = np.tile(durations, num_samples // num_durations)

    num_depths = num_samples // 3
    depths = np.repeat([0.02, 0.03, 0.01], num_depths)
    depths = np.tile(depths, num_samples // num_depths)

    data = pd.DataFrame({
        'Time': time,
        'Flux': flux,
        'Period': periods,
        'Transit_Duration': durations,
        'Transit_Depth': depths,
        'Label': np.concatenate([np.zeros(num_samples), np.ones(num_samples), np.zeros(num_samples * 2)])

    })

    return data
    np.random.seed(42)

    time = np.linspace(0, 10, num_samples)
    flux = 1 + 0.05 * np.sin(2 * np.pi * 1.5 * time)  # Simulating stellar variability

    num_periods = num_samples // 3
    periods = np.repeat([2, 4, 6], num_periods)
    periods = np.tile(periods, num_samples // num_periods)

    num_durations = num_samples // 3
    durations = np.repeat([1, 1.5, 2], num_durations)
    durations = np.tile(durations, num_samples // num_durations)

    num_depths = num_samples // 3
    depths = np.repeat([0.02, 0.03, 0.01], num_depths)
    depths = np.tile(depths, num_samples // num_depths)

    data = pd.DataFrame({
        'Time': time,
        'Flux': flux,
        'Period': periods,
        'Transit_Duration': durations,
        'Transit_Depth': depths,
        'Label': np.concatenate([np.zeros(num_samples), np.ones(num_samples), np.zeros(num_samples * 2)])
    })

    return data

In [None]:
# Function to visualize light curves
def visualize_light_curves(data):
    plt.figure(figsize=(12, 6))
    for label, group in data.groupby('Label'):
        plt.plot(group['Time'], group['Flux'], label=f'Label {int(label)}', alpha=0.7)
    plt.title('Synthetic Light Curves with Exoplanetary Transits')
    plt.xlabel('Time')
    plt.ylabel('Flux')
    plt.legend()
    plt.show()


In [None]:
# Function to visualize feature importance
def visualize_feature_importance(model, feature_names):
    importance = model.feature_importances_
    sorted_idx = np.argsort(importance)[::-1]

    plt.bar(range(len(importance)), importance[sorted_idx])
    plt.xticks(range(len(importance)), [feature_names[i] for i in sorted_idx], rotation=45)
    plt.title('Feature Importance')
    plt.show()


In [None]:
# Function to run the exoplanet detection project
def run_exoplanet_detection():
    # Generate synthetic dataset
    data = generate_synthetic_dataset()

    # Visualize light curves
    visualize_light_curves(data)

    # Data preprocessing
    X = data[['Time', 'Flux', 'Period', 'Transit_Duration', 'Transit_Depth']]
    y = data.loc[X.index, 'Label']  # Select labels corresponding to the selected features

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Visualize feature importance
    visualize_feature_importance(model, X.columns)

    # User input for prediction
    user_input_time = float(input("Enter a time value for prediction: "))
    user_input_flux = float(input("Enter a flux value for prediction: "))
    user_input_period = float(input("Enter the orbital period for prediction: "))
    user_input_duration = float(input("Enter the transit duration for prediction: "))
    user_input_depth = float(input("Enter the transit depth for prediction: "))

    # Make a prediction based on user input
    user_prediction = model.predict([[user_input_time, user_input_flux, user_input_period,
                                     user_input_duration, user_input_depth]])
    print(f"The model predicts: {'Exoplanet Transit Detected' if user_prediction[0] == 1 else 'No Exoplanet Transit Detected'}")

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Display evaluation metrics
    print(f"\nModel Evaluation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("Confusion Matrix:")
    print(conf_matrix)


In [None]:
data = pd.DataFrame([[3, 45, 60, 56, 78,0]], columns=['Time', 'Flux', 'Period', 'Transit_Duration', 'Transit_Depth','Label'])
visualize_light_curves(data)


In [3]:
df = pd.read_csv("TD_2023.11.22_08.37.40.csv")

In [4]:
df.head()

Unnamed: 0,pl_name,pl_orbper,pl_orbpererr1,pl_orbpererr2,pl_orbperlim,pl_rade,pl_radeerr1,pl_radeerr2,pl_radelim,pl_orbeccen,...,pl_ratror,pl_ratrorerr1,pl_ratrorerr2,pl_ratrorlim,sy_vmag,sy_vmagerr1,sy_vmagerr2,sy_kmag,sy_kmagerr1,sy_kmagerr2
0,55 Cnc e,2.808,0.002,-0.002,0.0,,,,,0.174,...,,,,,5.95084,0.023,-0.023,4.015,0.036,-0.036
1,55 Cnc e,0.736547,1e-06,-1e-06,0.0,1.875,0.029,-0.029,0.0,0.05,...,0.0182,0.0002,-0.0002,0.0,5.95084,0.023,-0.023,4.015,0.036,-0.036
2,55 Cnc e,0.736548,2e-06,-1e-06,0.0,,,,,0.028,...,,,,,5.95084,0.023,-0.023,4.015,0.036,-0.036
3,55 Cnc e,0.736539,7e-06,-7e-06,0.0,1.91,0.08,-0.08,0.0,,...,0.0187,0.0007,-0.0007,0.0,5.95084,0.023,-0.023,4.015,0.036,-0.036
4,55 Cnc e,2.79674,0.0001,-0.0001,0.0,,,,,0.264,...,,,,,5.95084,0.023,-0.023,4.015,0.036,-0.036


In [5]:
print(df.size)
df.shape

1536672


(32014, 48)

In [None]:
df.fillna(df.mean(), inplace=True)

In [7]:
print(df_cleaned.size)
df_cleaned.shape

NameError: name 'df_cleaned' is not defined

In [8]:
df1 = df.dropna()

In [21]:
df_cleaned.info()

NameError: name 'df_cleaned' is not defined

In [22]:
column_names = df_cleaned.columns
print(column_names)

NameError: name 'df_cleaned' is not defined

In [23]:
df_cleaned.head()

NameError: name 'df_cleaned' is not defined

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [13]:
# Select relevant features and target variable
features = df1[['pl_orbper', 'pl_rade', 'pl_orbeccen', 'pl_orbincl', 'pl_tranmid', 'pl_imppar', 'pl_trandep', 'pl_trandur', 'pl_ratdor', 'pl_ratror', 'sy_vmag', 'sy_kmag']]
target = df1['ttv_flag']


In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [15]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# Train a RandomForestClassifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 1.00
Precision: 0.00
Recall: 0.00
F1 Score: 0.00


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [17]:
# Sample input data
sample_input = pd.DataFrame({
    'pl_orbper': [10.0],
    'pl_rade': [1.5],
    'pl_orbeccen': [0.1],
    'pl_orbincl': [89.0],
    'pl_tranmid': [2459000.0],
    'pl_imppar': [0.02],
    'pl_trandep': [0.01],
    'pl_trandur': [2.0],
    'pl_ratdor': [0.01],
    'pl_ratror': [0.1],
    'sy_vmag': [10.0],
    'sy_kmag': [8.0]
})

# Standardize the sample input
sample_input_scaled = scaler.transform(sample_input)

# Make predictions using the trained model
sample_output = model.predict(sample_input_scaled)

print("Sample Input:")
print(sample_input)
print("\nPredicted Output:")
print(sample_output)

Sample Input:
   pl_orbper  pl_rade  pl_orbeccen  pl_orbincl  pl_tranmid  pl_imppar  \
0       10.0      1.5          0.1        89.0   2459000.0       0.02   

   pl_trandep  pl_trandur  pl_ratdor  pl_ratror  sy_vmag  sy_kmag  
0        0.01         2.0       0.01        0.1     10.0      8.0  

Predicted Output:
[0]


In [18]:
y_train.nunique()

2

In [None]:
# Sample input data for class 1 (ttv_flag = 1)
sample_input_class_1 = pd.DataFrame({
    'pl_orbper': [15.0],
    'pl_rade': [2.0],
    'pl_orbeccen': [0.2],
    'pl_orbincl': [88.0],
    'pl_tranmid': [2459100.0],
    'pl_imppar': [0.03],
    'pl_trandep': [0.02],
    'pl_trandur': [3.0],
    'pl_ratdor': [0.015],
    'pl_ratror': [0.15],
    'sy_vmag': [9.5],
    'sy_kmag': [7.8]
})

# Standardize the sample input
sample_input_class_1_scaled = scaler.transform(sample_input_class_1)

# Make predictions using the trained model
sample_output_class_1 = model.predict(sample_input_class_1_scaled)

print("Sample Input for Class 1:")
print(sample_input_class_1)
print("\nPredicted Output for Class 1:")
print(sample_output_class_1)


In [19]:
# Sample input data for class 1 (ttv_flag = 1)
sample_input_class_1_2 = pd.DataFrame({
    'pl_orbper': [20.0],
    'pl_rade': [1.8],
    'pl_orbeccen': [0.15],
    'pl_orbincl': [87.5],
    'pl_tranmid': [2459200.0],
    'pl_imppar': [0.025],
    'pl_trandep': [0.015],
    'pl_trandur': [2.5],
    'pl_ratdor': [0.012],
    'pl_ratror': [0.12],
    'sy_vmag': [9.8],
    'sy_kmag': [8.2]
})

# Standardize the sample input
sample_input_class_1_2_scaled = scaler.transform(sample_input_class_1_2)

# Make predictions using the trained model
sample_output_class_1_2 = model.predict(sample_input_class_1_2_scaled)

print("Sample Input for Class 1:")
print(sample_input_class_1_2)
print("\nPredicted Output for Class 1:")
print(sample_output_class_1_2)


Sample Input for Class 1:
   pl_orbper  pl_rade  pl_orbeccen  pl_orbincl  pl_tranmid  pl_imppar  \
0       20.0      1.8         0.15        87.5   2459200.0      0.025   

   pl_trandep  pl_trandur  pl_ratdor  pl_ratror  sy_vmag  sy_kmag  
0       0.015         2.5      0.012       0.12      9.8      8.2  

Predicted Output for Class 1:
[0]


In [20]:
# Select only rows where ttv_flag is 1
class_1_data = df1[df1['ttv_flag'] == 1]

# Display the dataset for class 1
print("Dataset for Class 1:")
print(class_1_data)


Dataset for Class 1:
           pl_name  pl_orbper  pl_orbpererr1  pl_orbpererr2  pl_orbperlim  \
2247      K2-266 d  14.697000   3.400000e-04  -3.500000e-04           0.0   
2250      K2-266 e  19.482000   1.200000e-03  -1.200000e-03           0.0   
4985   Kepler-11 b  10.303900   6.000000e-04  -1.000000e-03           0.0   
5000   Kepler-11 c  13.024100   1.300000e-03  -8.000000e-04           0.0   
5011   Kepler-11 d  22.684500   9.000000e-04  -9.000000e-04           0.0   
5027   Kepler-11 e  31.999600   8.000000e-04  -1.200000e-03           0.0   
5043   Kepler-11 f  46.688800   2.700000e-03  -3.200000e-03           0.0   
31250    WASP-18 b   0.941452   2.400000e-07  -2.400000e-07           0.0   
31256    WASP-18 b   0.941453   7.800000e-07  -8.600000e-07           0.0   

       pl_rade  pl_radeerr1  pl_radeerr2  pl_radelim  pl_orbeccen  ...  \
2247     2.930        0.140       -0.120         0.0      0.04700  ...   
2250     2.730        0.140       -0.110         0.0      0.