<a href="https://colab.research.google.com/github/DevaYadhala-04/Machine-Learning-Project/blob/main/WeatherPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import re
import missingno as mso
from scipy import stats
from scipy.stats import ttest_ind
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

data = pd.read_csv("seattle-weather.csv")
data.head()

data.shape

import warnings
warnings.filterwarnings('ignore')
sns.countplot("weather", data=data, palette="hls")

countrain = len(data[data.weather=="rain"])
countsun = len(data[data.weather=="sun"])
countdrizzle = len(data[data.weather=="drizzle"])
countsnow = len(data[data.weather=="snow"])
countfog = len(data[data.weather=="fog"])
print("Percent of Rain:{:2f}%".format((countrain / (len(data.weather)) * 100)))
print("Percent of Sun:{:2f}%".format((countsun / (len(data.weather)) * 100)))
print("Percent of Drizzle:{:2f}%".format((countdrizzle / (len(data.weather)) * 100)))
print("Percent of Snow:{:2f}%".format((countsnow / (len(data.weather)) * 100)))
print("Percent of Fog:{:2f}%".format((countfog / (len(data.weather)) * 100)))

data[["precipitation", "temp_max", "temp_min", "wind"]].describe()

sns.set(style="darkgrid")
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
sns.histplot(data=data, x="precipitation", kde=True, ax=axs[0, 0], color='green')
sns.histplot(data=data, x="temp_max", kde=True, ax=axs[0, 1], color='red')
sns.histplot(data=data, x="temp_min", kde=True, ax=axs[1, 0], color='skyblue')
sns.histplot(data=data, x="wind", kde=True, ax=axs[1, 1], color='orange')

sns.set(style="darkgrid")
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
sns.violinplot(data=data, x="precipitation", kde=True, ax=axs[0, 0], color='green')
sns.violinplot(data=data, x="temp_max", kde=True, ax=axs[0, 1], color='red')
sns.violinplot(data=data, x="temp_min", kde=True, ax=axs[1, 0], color='skyblue')
sns.violinplot(data=data, x="wind", kde=True, ax=axs[1, 1], color='yellow')

plt.figure(figsize=(12, 6))
sns.boxplot("precipitation", "weather", data=data, palette="YlOrBr")

plt.figure(figsize=(12, 6))
sns.boxplot("temp_max", "weather", data=data, palette="inferno")

plt.figure(figsize=(12, 6))
sns.boxplot("wind", "weather", data=data, palette="inferno")

plt.figure(figsize=(12, 6))
sns.boxplot("temp_min", "weather", data=data, palette="YlOrBr")

plt.figure(figsize=(12, 7))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')

data.plot("precipitation", "temp_max", style='o')
print("Pearson correlation:", data["precipitation"].corr(data["temp_max"]))
print("T Test and P value:", stats.ttest_ind(data["precipitation"], data["temp_max"]))

data.plot("wind", "temp_max", style='o')
print("Pearson correlation:", data["wind"].corr(data["temp_max"]))
print("T Test and P value:", stats.ttest_ind(data["wind"], data["temp_max"]))

data.plot("temp_max", "temp_min", style='o')
data.isna().sum()

plt.figure(figsize=(12, 6))
axz = plt.subplot(1, 2, 2)
mso.bar(data.drop(["date"], axis=1), ax=axz, fontsize=12)

df = data.drop(["date"], axis=1)

Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]

sns.set(style="darkgrid")
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
sns.histplot(data=df, x="precipitation", kde=True, ax=axs[0, 0], color='green')
sns.histplot(data=df, x="temp_max", kde=True, ax=axs[0, 1], color='red')
sns.histplot(data=df, x="temp_min", kde=True, ax=axs[1, 0], color='skyblue')
sns.histplot(data=df, x="wind", kde=True, ax=axs[1, 1], color='orange')

df.head()

lc = LabelEncoder()
df["weather"] = lc.fit_transform(df["weather"])

df.head()

x = ((df.loc[:, df.columns != "weather"]).astype(int)).values[:, 0:]
y = df["weather"].values

df.weather.unique()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=2)

knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
print("KNN Accuracy:{:.2f}%".format(knn.score(x_test, y_test) * 100))

svm = SVC()
svm.fit(x_train, y_train)
print("SVM Accuracy:{:.2f}%".format(svm.score(x_test, y_test) * 100))

gbc = GradientBoostingClassifier(subsample=0.5, n_estimators=450, max_depth=5, max_leaf_nodes=25)
gbc.fit(x_train, y_train)
print("Gradient Boosting Accuracy:{:.2f}%".format(gbc.score(x_test, y_test) * 100))

import warnings
warnings.filterwarnings('ignore')
xgb = XGBClassifier()
xgb.fit(x_train, y_train)
print("XGB Accuracy:{:.2f}%".format(xgb.score(x_test, y_test) * 100))

input = [[1.140175, 8.9, 2.8, 2.469818]]
ot = xgb.predict(input)
print("The weather is:")
if(ot == 0):
  print("Drizzle")
elif(ot == 1):
  print("Fog")
elif(ot == 2):
  print("Rain")
elif(ot == 3):
  print("Snow")
else:
  print("Sun")
