# Load Data

In [2]:
import pandas as pd
df = pd.read_csv("Cleaned_spacex_weather.csv")
df.head()

Unnamed: 0,lat,lon,temperature_2m,relative_humidity_2m,weathercode,wind_speed_10m,rocket,success,payloads,year,month,day,hour
0,9.047721,167.743129,26.4,77,2,33.9,0,0.0,28,2006,3,24,22
1,9.047721,167.743129,27.4,80,2,32.7,0,0.0,28,2007,3,21,1
2,9.047721,167.743129,27.2,79,1,22.0,0,0.0,56,2008,8,3,3
3,9.047721,167.743129,26.9,83,1,20.3,0,1.0,28,2008,9,28,23
4,9.047721,167.743129,27.3,86,3,28.5,0,1.0,28,2009,7,13,3


# Feature Selection

In [4]:
X = df.drop(columns="success")
y = df.success

In [5]:
X

Unnamed: 0,lat,lon,temperature_2m,relative_humidity_2m,weathercode,wind_speed_10m,rocket,payloads,year,month,day,hour
0,9.047721,167.743129,26.4,77,2,33.9,0,28,2006,3,24,22
1,9.047721,167.743129,27.4,80,2,32.7,0,28,2007,3,21,1
2,9.047721,167.743129,27.2,79,1,22.0,0,56,2008,8,3,3
3,9.047721,167.743129,26.9,83,1,20.3,0,28,2008,9,28,23
4,9.047721,167.743129,27.3,86,3,28.5,0,28,2009,7,13,3
...,...,...,...,...,...,...,...,...,...,...,...,...
200,28.561857,-80.577366,21.5,88,2,9.9,1,2,2022,12,1,0
201,34.632093,-120.610829,21.5,88,2,9.9,1,28,2022,12,1,0
202,28.561857,-80.577366,21.5,88,2,9.9,1,28,2022,12,1,0
203,28.608058,-80.603956,21.5,88,2,9.9,2,2,2022,12,1,0


In [6]:
y

0      0.0
1      0.0
2      0.0
3      1.0
4      1.0
      ... 
200    0.0
201    0.0
202    0.0
203    0.0
204    0.0
Name: success, Length: 205, dtype: float64

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.3, random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns= X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)


# Model Training

In [10]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(class_weight="balanced")

lr.fit(X_train_scaled, y_train)

In [11]:
y_train_pred = lr.predict(X_train_scaled)
pd.DataFrame({"Oringinal": y_train, "Predicted": y_train_pred})

Unnamed: 0,Oringinal,Predicted
123,1.0,1.0
148,1.0,1.0
90,1.0,1.0
137,1.0,1.0
26,1.0,1.0
...,...,...
121,1.0,1.0
105,1.0,1.0
129,1.0,0.0
31,1.0,0.0


In [12]:
from sklearn.metrics import accuracy_score
accuracy_score(y_train,y_train_pred)

0.9090909090909091

In [13]:
from sklearn.metrics import classification_report
print(classification_report(y_train,y_train_pred))

              precision    recall  f1-score   support

         0.0       0.58      0.82      0.68        17
         1.0       0.97      0.92      0.95       126

    accuracy                           0.91       143
   macro avg       0.78      0.87      0.81       143
weighted avg       0.93      0.91      0.92       143



In [14]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_train,y_train_pred))

[[ 14   3]
 [ 10 116]]


In [15]:
y_test_pred = lr.predict(X_test_scaled)
pd.DataFrame({"Oringinal": y_test, "Predicted": y_test_pred})

Unnamed: 0,Oringinal,Predicted
135,1.0,0.0
165,1.0,1.0
138,1.0,1.0
184,1.0,1.0
49,1.0,1.0
...,...,...
58,1.0,1.0
55,1.0,1.0
16,1.0,0.0
168,1.0,1.0


In [16]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_test_pred)

0.7741935483870968

In [17]:
from sklearn.metrics import classification_report
print(classification_report(y_test,y_test_pred))

              precision    recall  f1-score   support

         0.0       0.29      0.71      0.42         7
         1.0       0.96      0.78      0.86        55

    accuracy                           0.77        62
   macro avg       0.62      0.75      0.64        62
weighted avg       0.88      0.77      0.81        62



In [18]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_test_pred))

[[ 5  2]
 [12 43]]


In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Hyperparameter grid
param_grid = {
    'n_estimators': [100, 200],  # number of trees
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'class_weight': ['balanced']
}

# Random Forest model
rf = RandomForestClassifier(random_state=42)

# Grid search
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='f1_macro',  # focus on balance between classes
    cv=5,
    n_jobs=-1,
    verbose=1
)

# Fit on training data
grid_search.fit(X_train, y_train)

# Best params
print("🔧 Best Parameters Found:")
print(grid_search.best_params_)

# Predictions
y_pred = grid_search.predict(X_test)

# Evaluation
print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred))


Fitting 5 folds for each of 32 candidates, totalling 160 fits
🔧 Best Parameters Found:
{'class_weight': 'balanced', 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

📊 Confusion Matrix:
[[ 5  2]
 [ 1 54]]

📋 Classification Report:
              precision    recall  f1-score   support

         0.0       0.83      0.71      0.77         7
         1.0       0.96      0.98      0.97        55

    accuracy                           0.95        62
   macro avg       0.90      0.85      0.87        62
weighted avg       0.95      0.95      0.95        62



In [20]:
import pickle

best_model = grid_search.best_estimator_

with open("model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print("Model successfully saved as model.pkl")


✅ Model successfully saved as model.pkl


In [21]:
import pickle


with open("model.pkl", "rb") as f:
    model = pickle.load(f)


In [55]:
import numpy as np

lat = float(input("🌍 Enter latitude: "))
lon = float(input("🌍 Enter longitude: "))
temperature_2m = float(input("🌡️ Enter temperature (°C): "))
relative_humidity_2m = float(input("💧 Enter relative humidity (%): "))
weathercode = int(input("🌦️ Enter weather code (e.g. 0-3): "))
wind_speed_10m = float(input("💨 Enter wind speed (m/s): "))
rocket = int(input("🚀 Enter rocket code (e.g. 0 or 1): "))
payloads = int(input("📦 Enter payload code (e.g. 0 or 1): "))
year = int(input("📅 Enter year (e.g. 2020): "))
month = int(input("📅 Enter month (1-12): "))
day = int(input("📅 Enter day (1-31): "))
hour = int(input("🕒 Enter hour (0-23): "))

cols = X_train.columns


input_data = pd.DataFrame([[
    lat, lon, temperature_2m, relative_humidity_2m, weathercode,
    wind_speed_10m, rocket, payloads, year, month, day, hour
]], columns=cols)


# Predict karo
prediction = model.predict(input_data)[0]
proba = model.predict_proba(input_data)[0][int(prediction)]

# Output print karo
print(f"🚀 Prediction: {'Success' if prediction == 1 else 'Failure'}")
print(f"📊 Confidence: {round(proba * 100, 2)}%")



🌍 Enter latitude:  34.63
🌍 Enter longitude:  -80.57
🌡️ Enter temperature (°C):  14.4
💧 Enter relative humidity (%):  88
🌦️ Enter weather code (e.g. 0-3):  2
💨 Enter wind speed (m/s):  18.3
🚀 Enter rocket code (e.g. 0 or 1):  1
📦 Enter payload code (e.g. 0 or 1):  28
📅 Enter year (e.g. 2020):  2025
📅 Enter month (1-12):  6
📅 Enter day (1-31):  1
🕒 Enter hour (0-23):  17


🚀 Prediction: Success
📊 Confidence: 78.33%
