## Importing some important libraries

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns
import math
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score,StratifiedKFold
import sklearn.metrics as sm
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lasso
from sklearn.linear_model import BayesianRidge
from sklearn              import svm

%matplotlib inline

### loading the dataset into pandas dataframe

In [2]:
Dict = {'January':'01','February':'02','March':'03','April':'04','May':'05','June':'06',
        'July':'07','August':'08','September':'09','October':'10','November':'11','December':'12'}

def TemperatureByCountry(Country = ('India' , 'China'),  Month = 'May' , From_Date = '1743-11-01' , To_Date = '2013-01-01'):

    """"
    # Function Description : 
        This Function Takes tuple of one or more Countries.  
        Function Takes Particular Month of a year.  
        Function takes Timeframe(From_Date , End-Date).  
    # Function Parameters : 
            # Country : 
                    This Parameter takes tuples of Countries as Input.\n
                    Default is ('India' , 'China')
            
            # Month : 
                    This Parameter takes Month Name as Input.\n
                    Default is 'May'.
            # From_Date : 
                    This Parameter is the Starting Date of the Dataset.\n
                    Default is '1743-11-01'
            # To_Date : 
                    This Parameter is the Ending Date of the Dataset.\n
                    Default is 2013-01-01'
            
    """
   
    if isinstance(Country , str):
        Country = [Country,]
    
    File = pd.read_csv(r'dataset/Temperature/GlobalLandTemperaturesByCountry.csv')

    File.columns = ['Date' , 'Avg_Temperature' , 'Avg_Uncertainity','Country']

    File.dropna(subset = ['Avg_Temperature'] , inplace = True)

    File = File[File['Date'].between(From_Date,To_Date)]

    File = File[File['Date'].str.contains(Dict[Month])]
    #File[File['Date']] = File[File['Date'].str[:4]]
    
    File['Year'] = pd.DatetimeIndex(File['Date']).year

    File.reset_index(drop = True , inplace = True)
    
    New = pd.DataFrame
    Flag = 1

    for Ptr in Country:
        NewFile = File[File['Country'] == Ptr]

        NewFile = NewFile.drop(['Avg_Uncertainity','Country'],axis = 1)
        NewFile = NewFile.rename(columns = {'Avg_Temperature':Ptr})
        
        if(Flag == 1):
            New = NewFile
            Flag = 0
        else:
            #New = New.merge(NewFile)
            New = New.merge(NewFile)
    return New

def GetDataFrame(Country = ('India'), Month = 'May', From_Date = '1900-05-01', To_Date = '2013-12-01'):
    return TemperatureByCountry(Country, Month , From_Date, To_Date)


In [3]:
Country = 'China'
Month = 'May'
From_Date = '1743-11-01'
To_Date = '2013-01-01'
df = GetDataFrame((Country), Month, From_Date , To_Date)

In [21]:
df

Unnamed: 0,Date,China,Year
9749,1821-05-01,13.463,1821
9750,1822-05-01,12.932,1822
9751,1823-05-01,12.767,1823
9752,1833-05-01,12.857,1833
9753,1834-05-01,13.542,1834
...,...,...,...
9947,2008-05-01,14.874,2008
9948,2009-05-01,14.555,2009
9949,2010-05-01,14.293,2010
9950,2011-05-01,14.274,2011


In [24]:
X = df['Year'].values.reshape(-1, 1)
Y = df[Country]

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
Data_list = [X_train, X_test, y_train, y_test]

In [7]:
Kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)
def Model_Selection(Model,Data_list, name):
    Model.fit(Data_list[0],Data_list[2])
    y_Pred = Model.predict(Data_list[1])

    MAE = round(sm.mean_absolute_error(y_test, y_Pred), 2)
    MSE = round(sm.mean_squared_error(y_test, y_Pred), 2)
    MeAE = round(sm.median_absolute_error(y_test, y_Pred), 2)
    Variance = round(sm.explained_variance_score(y_test, y_Pred), 2)
    R2_Score =  round(sm.r2_score(y_test, y_Pred), 2)

    
    frame = pd.DataFrame({'Model': [name], 'MeanAbsoluteError': [MAE], 'MeanSquaredError': [MSE], 'MedianAbsoluteError': [MeAE], 'Variance': [Variance], 'R2_score':[R2_Score]})
    return frame

In [20]:
model1 = LinearRegression()
model2 = svm.SVR()
model3 = ElasticNet()
model4 = Lasso(alpha=0.1)
model5 = BayesianRidge()
Model_data = pd.concat([
                       Model_Selection(model1, Data_list, 'LinearRegression'),
                       Model_Selection(model2, Data_list, 'SupportVectorRegression'),
                       Model_Selection(model3, Data_list, 'ElasticNet'),
                       Model_Selection(model4, Data_list, 'Lasso'),
                       Model_Selection(model5, Data_list, 'BaysianRidge')],axis = 0).reset_index()
Model_data

Unnamed: 0,index,Model,MeanAbsoluteError,MeanSquaredError,MedianAbsoluteError,Variance,R2_score
0,0,LinearRegression,2.37,19.26,1.14,-0.05,-0.06
1,0,SupportVectorRegression,1.68,17.74,0.4,0.04,0.02
2,0,ElasticNet,2.36,19.23,1.13,-0.05,-0.06
3,0,Lasso,2.36,19.25,1.14,-0.05,-0.06
4,0,BaysianRidge,2.24,18.65,1.07,-0.02,-0.03


In [13]:
Model = svm.SVR()
Model.fit(Data_list[0],Data_list[2])
y_Pred = Model.predict([[2024], [2025], [1834]])
y_Pred

array([14.19811149, 14.19978387, 12.83810952])