In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE  # <- NOTE: from imblearn, not sklearn

# Step 1: Load your data
df = pd.read_csv("D:/ML/Logistic/weather_forecast_data_Classification_Regression.csv")

# Step 2: Encode the target (Rain: 'Yes'/'No') to numeric (0 and 1)
encoder = OrdinalEncoder()
df['Rain'] = encoder.fit_transform(df[['Rain']])  # target column
print("Target categories:", encoder.categories_)

# Step 3: Split into features (X) and target (y)
X = df.drop("Rain", axis=1)
y = df["Rain"]

# Step 4: Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Apply SMOTE to training data
smote = SMOTE(random_state=42)
x_train_smote, y_train_smote = smote.fit_resample(x_train, y_train)

# Step 6: Standardize the features (important for logistic regression)
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_smote)
x_test_scaled = scaler.transform(x_test)

# Step 7: Train the logistic regression model
model = LogisticRegression()
model.fit(x_train_scaled, y_train_smote)

# Step 8: Predict and evaluate
y_pred = model.predict(x_test_scaled)
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=["No Rain", "Rain"]))


Target categories: [array(['no rain', 'rain'], dtype=object)]
Classification Report:
               precision    recall  f1-score   support

     No Rain       0.99      0.89      0.94       443
        Rain       0.53      0.91      0.67        57

    accuracy                           0.90       500
   macro avg       0.76      0.90      0.80       500
weighted avg       0.93      0.90      0.91       500

