# Weather Prediction Model

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
data = pd.read_csv("weather.csv")
pd.set_option('display.max_columns', None)
data.head()

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.shape

## Visualising the data

In [None]:
plt.figure(figsize=(15, 5))
sns.displot(x='MaxTemp', data=data)
plt.xlabel('Max Temp')
plt.show()

In [None]:
plt.figure(figsize=(6, 5))
sc = plt.scatter(x='MinTemp', y='MaxTemp', c='MaxTemp', cmap='viridis', data=data, marker='o')
plt.colorbar(sc, label='Max Temp')
plt.xlabel('MinTemp')
plt.ylabel('Max Temp')
plt.show()

In [None]:
plt.figure(figsize=(6, 5))
hb = plt.hexbin(x='MinTemp', y='MaxTemp', cmap='Blues', data=data, gridsize=20)
plt.colorbar(hb, label='Max Temp')
plt.xlabel('MinTemp')
plt.ylabel('Max Temp')
plt.show()

## Encoding the data

In [None]:
data['RainToday'] = data['RainToday'].replace({'No':0, 'Yes':1})
data['RainTomorrow'] = data['RainTomorrow'].replace({'No':0, 'Yes':1})

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
data['WindGustDir'] = le.fit_transform(data['WindGustDir'])
data['WindDir9am'] = le.fit_transform(data['WindDir9am'])
data['WindDir3pm'] = le.fit_transform(data['WindDir3pm'])

In [None]:
data.head()

## Cleaning the data 

In [None]:
# Filling missing values
data['Sunshine'].fillna(data['Sunshine'].median(), inplace=True)
data['WindGustSpeed'].fillna(data['WindGustSpeed'].median(), inplace=True)
data['WindSpeed9am'].fillna(data['WindSpeed9am'].median(), inplace=True)

In [None]:
data.info()

## Splitting the data

In [None]:
y = data['MaxTemp']
X = data.drop(['MaxTemp'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Training the model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X_train, y_train)

## Evaluating the model

In [None]:
model.score(X_test, y_test)

In [None]:
prediction = model.predict(X_test)
print(f"Mean Squared Error : {mean_squared_error(y_test, prediction)}")

In [None]:
# Actual vs Predicted Values
df = pd.DataFrame({'Actual':y_test.values.flatten(), 'Predicted':prediction.flatten()})
print(df)