In [1]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Load dataset
df = pd.read_csv("E:\\Datasets\\seattle-weather.csv")
df

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,01/01/2012,0.0,12.8,5.0,4.7,drizzle
1,02/01/2012,10.9,10.6,2.8,4.5,rain
2,03/01/2012,0.8,11.7,7.2,2.3,rain
3,04/01/2012,20.3,12.2,5.6,4.7,rain
4,05/01/2012,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,27/12/2015,8.6,4.4,1.7,2.9,rain
1457,28/12/2015,1.5,5.0,1.7,1.3,rain
1458,29/12/2015,0.0,7.2,0.6,2.6,fog
1459,30/12/2015,0.0,5.6,-1.0,3.4,sun


In [2]:
# Step 2: Drop the 'date' column (not useful for prediction)
df = df.drop(columns=["date"])
df

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,drizzle
1,10.9,10.6,2.8,4.5,rain
2,0.8,11.7,7.2,2.3,rain
3,20.3,12.2,5.6,4.7,rain
4,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...
1456,8.6,4.4,1.7,2.9,rain
1457,1.5,5.0,1.7,1.3,rain
1458,0.0,7.2,0.6,2.6,fog
1459,0.0,5.6,-1.0,3.4,sun


In [3]:
# Step 3: Convert target variable 'weather' to binary (1 = rain, 0 = not rain)
df["weather"] = df["weather"].apply(lambda x: 1 if x.strip().lower() == "rain" else 0)
df["weather"]

0       0
1       1
2       1
3       1
4       1
       ..
1456    1
1457    1
1458    0
1459    0
1460    0
Name: weather, Length: 1461, dtype: int64

In [4]:
# Step 4: Define input (X) and output (y)
X = df.drop(columns=["weather"])
y = df["weather"]

In [5]:
X

Unnamed: 0,precipitation,temp_max,temp_min,wind
0,0.0,12.8,5.0,4.7
1,10.9,10.6,2.8,4.5
2,0.8,11.7,7.2,2.3
3,20.3,12.2,5.6,4.7
4,1.3,8.9,2.8,6.1
...,...,...,...,...
1456,8.6,4.4,1.7,2.9
1457,1.5,5.0,1.7,1.3
1458,0.0,7.2,0.6,2.6
1459,0.0,5.6,-1.0,3.4


In [6]:
y

0       0
1       1
2       1
3       1
4       1
       ..
1456    1
1457    1
1458    0
1459    0
1460    0
Name: weather, Length: 1461, dtype: int64

In [7]:
# Step 5: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
#stratify=y → “Target variable ke class proportion ko barabar rakho jab train-test split karo.”
# Step 6: Feature scaling (important for numeric data)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 7: Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Step 8: Make predictions
y_pred = model.predict(X_test_scaled)

# Step 9: Evaluate model
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))

✅ Accuracy: 0.8532423208191127

📊 Confusion Matrix:
 [[151  13]
 [ 30  99]]

📋 Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.92      0.88       164
           1       0.88      0.77      0.82       129

    accuracy                           0.85       293
   macro avg       0.86      0.84      0.85       293
weighted avg       0.86      0.85      0.85       293



In [8]:
# Now test on completely new unseen data
new_data = pd.DataFrame({
    'precipitation': [0, 12.5, 4.3, 0, 8.0],
    'temp_max': [10.0, 9.2, 11.0, 8.3, 12.1],
    'temp_min': [2.0, 3.5, 5.1, 0.5, 6.2],
    'wind': [2.2, 4.0, 3.1, 1.8, 5.5]
})

#Predict using trained model
predictions = model.predict(new_data)

#Show prediction results
new_data['Predicted_weather'] = predictions
new_data['Predicted_weather'] = new_data['Predicted_weather'].map({0: 'sun', 1: 'rain'})

print("\n🌤️ Predictions on New Unseen Data:")
print(new_data)


🌤️ Predictions on New Unseen Data:
   precipitation  temp_max  temp_min  wind Predicted_weather
0            0.0      10.0       2.0   2.2               sun
1           12.5       9.2       3.5   4.0              rain
2            4.3      11.0       5.1   3.1               sun
3            0.0       8.3       0.5   1.8               sun
4            8.0      12.1       6.2   5.5              rain




In [9]:
# Step 7 (replace Logistic Regression with SVM)
from sklearn.svm import SVC

# Train SVM model
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')  # RBF kernel is default
svm_model.fit(X_train_scaled, y_train)

# Step 8: Make predictions
y_pred_svm = svm_model.predict(X_test_scaled)

# Step 9: Evaluate SVM model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print("✅ SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("\n📊 SVM Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("\n📋 SVM Classification Report:\n", classification_report(y_test, y_pred_svm))


✅ SVM Accuracy: 0.8668941979522184

📊 SVM Confusion Matrix:
 [[158   6]
 [ 33  96]]

📋 SVM Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.96      0.89       164
           1       0.94      0.74      0.83       129

    accuracy                           0.87       293
   macro avg       0.88      0.85      0.86       293
weighted avg       0.88      0.87      0.86       293

