# Plots For Data

### 0. Pre Processing For Delhi.csv

In [None]:
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
import csv
import datetime as dt

plt.rcParams["figure.figsize"] = (50,10)

col_names = {} #col index to name
columns = {} # col name to list
with open("./data/Delhi.csv") as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0

    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            i=0
            for s in row:
                col_names[i] = s
                columns[s]=[]
                i += 1
            line_count += 1
        else:
            i=0
            for s in row:
                columns[col_names[i]].append(s if i == 0 else float(s))
                i += 1
            line_count += 1
    print(f'Processed {line_count} lines.')

columns["date"] = [dt.datetime.strptime(d,'%Y-%m-%d').date() for d in columns["date"]]

### 1. Date vs Avg Temp

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Temp (c)"])
plt.gcf().autofmt_xdate()

### 2. Date vs UV

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["UV Index"])
plt.gcf().autofmt_xdate()

### 3. Date vs Total Percip

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Total Precipitation (MM)"])
plt.gcf().autofmt_xdate()

### 4. Date vs Pressure

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Pressure (P)"])
plt.gcf().autofmt_xdate()

### 5. Date vs Humidity

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Humidity (%)"])
plt.gcf().autofmt_xdate()


### 6. Date vs Avg Cloud Cover

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Cloud Cover"])
plt.gcf().autofmt_xdate()

### 7. Date vs Avg resultant wind vector [E]

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Resultant Wind vector [E](km/h)"])
plt.gcf().autofmt_xdate()

### 8. Date vs Avg resultant wind Dir [N]

In [None]:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d/%m/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=100))
plt.plot(columns["date"],columns["Avg Resultant Wind vector [N](km/h)"])
plt.gcf().autofmt_xdate()

## Importing pandas and numpy

In [None]:
import pandas as pd
import numpy as np

## Importing Data

In [None]:
files = ["Delhi", "East_20", "East_100", "East_500", "North_20", "North_100", "North_500", "South_20", "South_100", "South_500", "West_20", "West_100","West_500"]
#List containing the data for different cities
data = []

for i in range(len(files)):
    data_ = pd.read_csv("data/" + files[i] + ".csv")
    if i != 0:
        data_.pop("date")
    data.append(data_)

## Importing data manipulation

In [None]:
class TemperaturePrediction:
#days denote size of the window
    def __init__(self, data, days):
        # X and Y data
        self.X = np.zeros((data[0].shape[0] - days, ((data[0].shape[1] - 1)*len(data))*days))
        self.Y = np.zeros((data[0].shape[0] - days, 1))
        
        #Concatination of cities data;
        data_cities = data[0]
        for i_ in range(1, len(data)):
            data_cities = pd.concat([data_cities, data[i_]], axis = 1)
        data_cities = data_cities.to_numpy()
        
        #Data filing in X
        for i_ in range(0, self.X.shape[0]):
            for j in range(i_, i_ + days):
                index =(data_cities.shape[1] - 1)*(j - i_)
                self.X[i_, index: index + data_cities.shape[1] - 1] = data_cities[j, 1:]
        
        #Data filing in Y
        self.Y[:,:] = data[0]["Avg Temp (c)"][days:].to_numpy().reshape((-1,1))
        
        self.standardize_data()
    
    #Standardizing the data
    def standardize_data(self):
        mean = np.mean(self.X, axis = 0)
        std = np.std(self.X, axis = 0)
        std[std == 0] = 1
        self.X = self.X - mean
        self.X = self.X / std
        
    #Splitting the data
    #def split(self):
        

## Importing Sklearn

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

## Linear Regression

In [None]:
train_rmse_LR = []
test_rmse_LR = []
days = []
models_LR = []
for i in range(1,16):
    obj = TemperaturePrediction(data, i)
    x_train, x_test, y_train, y_test = train_test_split(obj.X, obj.Y, test_size=0.2, random_state=1)
    model = LinearRegression().fit(x_train, y_train)
    y_pred_test = model.predict(x_test)
    y_pred_train = model.predict(x_train)
    models_LR.append(model)
    print(i)
    days.append(i)
    test_rmse_LR.append(mean_squared_error(y_test, y_pred_test)**(1/2))
    train_rmse_LR.append(mean_squared_error(y_train, y_pred_train)**(1/2))
    

plt.plot(days, train_rmse_LR, label = "Train")
plt.plot(days, test_rmse_LR, label = "Test")
plt.ylabel("RMSE")
plt.xlabel("Window Day")
plt.legend()
plt.show()

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\etash\AppData\Local\Programs\Python\Python38\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\etash\AppData\Local\Temp/ipykernel_5016/1316242180.py", line 8, in <module>
    model = LinearRegression().fit(x_train, y_train)
  File "C:\Users\etash\AppData\Local\Programs\Python\Python38\lib\site-packages\sklearn\linear_model\_base.py", line 717, in fit
    self.coef_, self._residues, self.rank_, self.singular_ = linalg.lstsq(X, y)
  File "C:\Users\etash\AppData\Local\Programs\Python\Python38\lib\site-packages\scipy\linalg\basic.py", line 1204, in lstsq
    x, s, rank, info = lapack_func(a1, b1, lwork,
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\etash\AppData\Local\Programs\Python\Python38\lib\site-packages\IPython\core\interactiveshell.py", line 2

TypeError: object of type 'NoneType' has no len()

## Lasso

In [None]:
train_rmse_LA = []
test_rmse_LA = []
days = []
models_LA = []
for i in range(1,16):
    obj = TemperaturePrediction(data, i)
    x_train, x_test, y_train, y_test = train_test_split(obj.X, obj.Y, test_size=0.2, random_state=1)
    model = Lasso().fit(x_train, np.ravel(y_train))
    y_pred_test = model.predict(x_test)
    y_pred_train = model.predict(x_train)
    models_LA.append(model)
    print(i)
    days.append(i)
    test_rmse_LA.append(mean_squared_error(y_test, y_pred_test)**(1/2))
    train_rmse_LA.append(mean_squared_error(y_train, y_pred_train)**(1/2))


plt.plot(days, train_rmse_LA, label = "Train")
plt.plot(days, test_rmse_LA, label = "Test")
plt.ylabel("RMSE")
plt.xlabel("Window Day")
plt.legend()
plt.show()

## Ridge

In [None]:
train_rmse_R = []
test_rmse_R = []
days = []
models_R = []
for i in range(1,16):
    obj = TemperaturePrediction(data, i)
    x_train, x_test, y_train, y_test = train_test_split(obj.X, obj.Y, test_size=0.2, random_state=1)
    model = Ridge().fit(x_train, np.ravel(y_train))
    y_pred_test = model.predict(x_test)
    y_pred_train = model.predict(x_train)
    models_R.append(model)
    print(i)
    days.append(i)
    test_rmse_R.append(mean_squared_error(y_test, y_pred_test)**(1/2))
    train_rmse_R.append(mean_squared_error(y_train, y_pred_train)**(1/2))


plt.plot(days, train_rmse_R, label = "Train")
plt.plot(days, test_rmse_R, label = "Test")
plt.ylabel("RMSE")
plt.xlabel("Window Day")
plt.legend()
plt.show()

## MLP

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

train_rmse_MLP = []
test_rmse_MLP = []
days = []
models_MLP = []
for i in range(1,16):
    obj = TemperaturePrediction(data, i)
    x_train, x_test, y_train, y_test = train_test_split(obj.X, obj.Y, test_size=0.2, random_state=1)
    model = MLPRegressor(random_state=1, max_iter=500).fit(x_train, np.ravel(y_train))
    y_pred_test = model.predict(x_test)
    y_pred_train = model.predict(x_train)
    models_MLP.append(model)
    print(i)
    days.append(i)
    test_rmse_MLP.append(mean_squared_error(y_test, y_pred_test)**(1/2))
    train_rmse_MLP.append(mean_squared_error(y_train, y_pred_train)**(1/2))

plt.plot(days, train_rmse_MLP, label = "Train")
plt.plot(days, test_rmse_MLP, label = "Test")
plt.ylabel("RMSE")
plt.xlabel("Window Day")
plt.legend()
plt.show()