In [101]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [102]:
# Load your data into a pandas dataframe
df=pd.read_excel('rain.xlsx')

In [103]:
df.head(3)

Unnamed: 0,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,,W,44.0,W,WNW,20.0,24.0,71,22,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,,WNW,44.0,NNW,WSW,4.0,22.0,44,25,1010.6,1007.8,,,17.2,24.3,No,No
2,,WSW,46.0,W,WSW,19.0,26.0,38,30,1007.6,1008.7,,2.0,21.0,23.2,No,No


In [104]:
df.tail(3)

Unnamed: 0,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
146,,NNW,63.0,NW,WNW,26.0,31.0,77,62,1004.0,1003.3,8.0,8.0,9.5,11.6,Yes,Yes
147,,NW,26.0,N,NNW,9.0,13.0,82,79,1013.8,1013.5,8.0,8.0,7.6,9.4,Yes,Yes
148,,WNW,35.0,W,W,15.0,20.0,83,48,1018.0,1018.3,8.0,1.0,10.1,14.0,Yes,No


In [105]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Sunshine       0 non-null      float64
 1   WindGustDir    146 non-null    object 
 2   WindGustSpeed  146 non-null    float64
 3   WindDir9am     128 non-null    object 
 4   WindDir3pm     145 non-null    object 
 5   WindSpeed9am   148 non-null    float64
 6   WindSpeed3pm   148 non-null    float64
 7   Humidity9am    149 non-null    int64  
 8   Humidity3pm    149 non-null    int64  
 9   Pressure9am    149 non-null    float64
 10  Pressure3pm    149 non-null    float64
 11  Cloud9am       32 non-null     float64
 12  Cloud3pm       36 non-null     float64
 13  Temp9am        149 non-null    float64
 14  Temp3pm        149 non-null    float64
 15  RainToday      148 non-null    object 
 16  RainTomorrow   148 non-null    object 
dtypes: float64(10), int64(2), object(5)
memory usage: 19.9

In [106]:
df.describe()

Unnamed: 0,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm
count,0.0,146.0,148.0,148.0,149.0,149.0,149.0,149.0,32.0,36.0,149.0,149.0
mean,,39.945205,9.635135,16.310811,54.456376,29.100671,1014.358389,1011.426846,6.15625,5.027778,19.075168,27.730201
std,,14.695897,7.300311,7.922114,14.22858,18.443392,5.902921,5.708631,2.566652,3.047117,4.911128,6.550528
min,,17.0,0.0,0.0,20.0,8.0,994.3,993.0,0.0,1.0,7.6,9.4
25%,,30.0,4.0,11.0,44.0,18.0,1009.9,1007.6,5.0,1.0,15.9,23.2
50%,,37.0,7.0,15.0,53.0,25.0,1014.2,1011.6,8.0,6.0,18.1,28.2
75%,,48.0,15.0,20.5,62.0,33.0,1018.0,1015.0,8.0,8.0,21.6,31.2
max,,98.0,30.0,48.0,92.0,93.0,1027.1,1023.5,8.0,8.0,34.5,43.4


In [107]:
df.isnull().sum()

Sunshine         149
WindGustDir        3
WindGustSpeed      3
WindDir9am        21
WindDir3pm         4
WindSpeed9am       1
WindSpeed3pm       1
Humidity9am        0
Humidity3pm        0
Pressure9am        0
Pressure3pm        0
Cloud9am         117
Cloud3pm         113
Temp9am            0
Temp3pm            0
RainToday          1
RainTomorrow       1
dtype: int64

In [108]:
# Encode categorical variables to numeric
df['WindGustDir'] = df['WindGustDir'].astype('category').cat.codes
df['WindDir9am'] = df['WindDir9am'].astype('category').cat.codes
df['WindDir3pm'] = df['WindDir3pm'].astype('category').cat.codes
df['RainToday'] = df['RainToday'].map({'No': 0, 'Yes': 1})
df['RainTomorrow'] = df['RainTomorrow'].map({'No': 0, 'Yes': 1})

In [109]:
# Handle missing values (for simplicity, filling with mean or mode here)
df['Sunshine'].fillna(df['Sunshine'].mean(), inplace=True)
df['WindGustDir'].fillna(df['WindGustDir'].mean(), inplace=True)
df.fillna(method='ffill', inplace=True)  # Forward fill other missing values

In [110]:
df['Cloud3pm'].fillna(0, inplace=True)

In [111]:
df.isnull().sum()

Sunshine         149
WindGustDir        0
WindGustSpeed      0
WindDir9am         0
WindDir3pm         0
WindSpeed9am       0
WindSpeed3pm       0
Humidity9am        0
Humidity3pm        0
Pressure9am        0
Pressure3pm        0
Cloud9am           0
Cloud3pm           0
Temp9am            0
Temp3pm            0
RainToday          0
RainTomorrow       0
dtype: int64

In [112]:
df.head()

Unnamed: 0,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,,13,44.0,13,14,20.0,24.0,71,22,1007.7,1007.1,8.0,0.0,16.9,21.8,0.0,0.0
1,,14,44.0,6,15,4.0,22.0,44,25,1010.6,1007.8,8.0,0.0,17.2,24.3,0.0,0.0
2,,15,46.0,13,15,19.0,26.0,38,30,1007.6,1008.7,8.0,2.0,21.0,23.2,0.0,0.0
3,,4,24.0,9,0,11.0,9.0,45,16,1017.6,1012.8,8.0,2.0,18.1,26.5,0.0,0.0
4,,13,41.0,1,7,7.0,20.0,82,33,1010.8,1006.0,7.0,8.0,17.8,29.7,0.0,0.0


In [113]:
df = df.drop('Sunshine',axis = 1)

In [114]:
# Define features (X) and target (Y)
X = df.drop(['RainTomorrow'], axis=1)
y = df['RainTomorrow']

In [131]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [134]:
# Train a Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators = 100, random_state = 42 )
rf_model.fit(X_train,y_train)
#make prediction
y_pred = rf_model.predict(X_test)

In [135]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 90.00%


In [137]:
will_rain_all = rf_model.predict(X_test)

# Iterate through the predictions and print results for each day
for i, prediction in enumerate(will_rain_all):
    if prediction == 1:
        print(f"Prediction for row {i}: It will rain tomorrow.")
    else:
        print(f"Prediction for row {i}: It will be sunny tomorrow.")

Prediction for row 0: It will rain tomorrow.
Prediction for row 1: It will be sunny tomorrow.
Prediction for row 2: It will be sunny tomorrow.
Prediction for row 3: It will be sunny tomorrow.
Prediction for row 4: It will be sunny tomorrow.
Prediction for row 5: It will be sunny tomorrow.
Prediction for row 6: It will be sunny tomorrow.
Prediction for row 7: It will be sunny tomorrow.
Prediction for row 8: It will be sunny tomorrow.
Prediction for row 9: It will be sunny tomorrow.
Prediction for row 10: It will be sunny tomorrow.
Prediction for row 11: It will rain tomorrow.
Prediction for row 12: It will be sunny tomorrow.
Prediction for row 13: It will be sunny tomorrow.
Prediction for row 14: It will be sunny tomorrow.
Prediction for row 15: It will be sunny tomorrow.
Prediction for row 16: It will be sunny tomorrow.
Prediction for row 17: It will be sunny tomorrow.
Prediction for row 18: It will be sunny tomorrow.
Prediction for row 19: It will be sunny tomorrow.
Prediction for row