## Importing libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


### Loading dataset

In [38]:
# Load the weather data into a Pandas DataFrame
data = pd.read_csv("weather.csv")
data


Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,NW,6.0,...,29,1019.7,1015.0,7,7,14.4,23.6,No,3.6,Yes
1,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,W,4.0,...,36,1012.4,1008.4,5,3,17.5,25.7,Yes,3.6,Yes
2,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,NNE,6.0,...,69,1009.5,1007.2,8,7,15.4,20.2,Yes,39.8,Yes
3,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,W,30.0,...,56,1005.5,1007.0,2,7,13.5,14.1,Yes,2.8,Yes
4,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,ESE,20.0,...,49,1018.3,1018.5,7,7,11.1,15.4,Yes,0.0,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,9.0,30.7,0.0,7.6,12.1,NNW,76.0,SSE,NW,7.0,...,15,1016.1,1010.8,1,3,20.4,30.0,No,0.0,No
362,7.1,28.4,0.0,11.6,12.7,N,48.0,NNW,NNW,2.0,...,22,1020.0,1016.9,0,1,17.2,28.2,No,0.0,No
363,12.5,19.9,0.0,8.4,5.3,ESE,43.0,ENE,ENE,11.0,...,47,1024.0,1022.8,3,2,14.5,18.3,No,0.0,No
364,12.5,26.9,0.0,5.0,7.1,NW,46.0,SSW,WNW,6.0,...,39,1021.0,1016.2,6,7,15.8,25.9,No,0.0,No


### Preprocessing data

In [39]:
# Preprocess the data to handle missing values and normalize the features

# Remove columns with strings
columns_to_drop = ['WindGustDir', 'WindDir9am', 'WindDir3pm']
data = data.drop(columns_to_drop, axis=1)

# Replace 'yes' and 'no' with '1' and '0' respectively
data['RainToday'] = data['RainToday'].map({'Yes': 1, 'No': 0})
data['RainTomorrow'] = data['RainTomorrow'].map({'Yes': 1, 'No': 0})
data

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,8.0,24.3,0.0,3.4,6.3,30.0,6.0,20,68,29,1019.7,1015.0,7,7,14.4,23.6,0,3.6,1
1,14.0,26.9,3.6,4.4,9.7,39.0,4.0,17,80,36,1012.4,1008.4,5,3,17.5,25.7,1,3.6,1
2,13.7,23.4,3.6,5.8,3.3,85.0,6.0,6,82,69,1009.5,1007.2,8,7,15.4,20.2,1,39.8,1
3,13.3,15.5,39.8,7.2,9.1,54.0,30.0,24,62,56,1005.5,1007.0,2,7,13.5,14.1,1,2.8,1
4,7.6,16.1,2.8,5.6,10.6,50.0,20.0,28,68,49,1018.3,1018.5,7,7,11.1,15.4,1,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,9.0,30.7,0.0,7.6,12.1,76.0,7.0,50,38,15,1016.1,1010.8,1,3,20.4,30.0,0,0.0,0
362,7.1,28.4,0.0,11.6,12.7,48.0,2.0,19,45,22,1020.0,1016.9,0,1,17.2,28.2,0,0.0,0
363,12.5,19.9,0.0,8.4,5.3,43.0,11.0,9,63,47,1024.0,1022.8,3,2,14.5,18.3,0,0.0,0
364,12.5,26.9,0.0,5.0,7.1,46.0,6.0,28,69,39,1021.0,1016.2,6,7,15.8,25.9,0,0.0,0


In [40]:
data.shape

(366, 19)

In [42]:
# Fill in the missing values and normalize the features
data = data.fillna(data.mean())
data = (data - data.mean()) / data.std()

#data.fillna(data.mean(), inplace=True)
data.shape

(366, 19)

### Splitting data

In [43]:
# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(data.drop("Temp3pm", axis=1), data["Temp3pm"], test_size=0.2)


### Model training

In [44]:
# Train a linear regression model on the training data
model = LinearRegression()
model.fit(X_train, y_train)


LinearRegression()

### Testing

In [46]:

# Use the trained model to make predictions on the test data
y_pred = model.predict(X_test)
y_pred

array([-1.22381838,  0.79173967,  1.05278181,  0.52320752, -1.22469112,
       -0.85828541, -1.34970154, -0.04519269, -1.63143733, -0.02645615,
       -0.70933353,  1.70363544, -1.1953248 ,  1.08965603, -0.83312085,
        0.77066358, -1.05219002, -0.54634219, -0.14905866, -0.23394718,
       -0.53624357, -0.09505662, -0.53009395,  0.15841678, -1.0473443 ,
        0.98151808,  0.11772699,  0.98840844, -1.05739201, -1.10867102,
       -0.10143417,  0.12653798,  0.55135838, -0.88556072,  1.13599229,
        1.20013617,  0.57764769,  0.08629739, -1.29914273,  1.23412599,
       -1.32199523,  0.52374722, -0.34343563,  0.08789429, -0.38540838,
       -0.8570712 ,  0.13798534, -0.02089395, -0.51341569, -0.40749092,
        0.94374433,  1.94807596,  0.63524076, -0.07164827,  1.13486659,
       -1.13785938,  1.96751644, -0.90408253,  1.22198133, -0.25263304,
        0.21570803,  1.35875049, -1.64902967,  1.07704734,  0.63117954,
        0.58691709, -0.70509933, -0.26464624,  0.49913159,  0.39

### Performance evaluation

In [47]:
# Evaluate the model's performance by computing the mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse)


Mean Squared Error:  0.013572632048026341


_Thank you!_