In [17]:
Link = 'https://storage.googleapis.com/kaggle-data-sets/961564/1629646/compressed/station_day.csv.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20220929%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20220929T144621Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=a2f0499d08b759f4b5f8224e2d2a006b8059ba95b3240e5799a734a2f9655c8cc7af4b49cbe7c7672c2a428eccf7b1f2a7a40201b1ca8eefd34d7816eeb43dd71aba9a2e61e67583b7a241387b6ff7b43de45c1b1422af55aad8a4eb8422a888fd8fc6790898bb1c2dcd621de1baaaefd1fa4c21bb24dce60fdad17c4c0d67667a9846e019f345769e4ca11c24e490d3e73d46027f18cb43f2a9adad3f56a9eac58d29f4f6c85f4d7b3a3f62f9d84e329ce71c47057e84d6690e3b91dce55aa3a46517d8cc9a5703736ba131021e21252003866b93eabe4addc033e50bf67d1319f1b7f816e58a85ce0321b7e992aeb0ab335137b9aad5f9eac436bf0b92441b'

In [18]:
import os
import time
import wget 
import shutil
import pandas as pd 
import import_ipynb
import Configuration as C 

In [19]:
def Download_Dataset_No_Nulls(Link:str)->str:
    C.CheckDataFolder()
    FileName = wget.download(Link)
    shutil.unpack_archive(FileName,C.DataFolder)
    os.remove(FileName)
    DataFileName = f'{C.DataFolder}\\{[x for x in os.listdir(C.DataFolder) if x.endswith(".csv")][0]}'
    df = pd.read_csv(DataFileName)
    df = df.drop(columns=['StationId', 'Date', 'AQI'])
    try:
        df = df.drop(columns=['Unnamed: 0'])
    except:
        pass
    df = df.dropna()
    df.to_csv(DataFileName)
    return DataFileName

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
En = LabelEncoder()

In [22]:
help(RandomForestClassifier)

Help on class RandomForestClassifier in module sklearn.ensemble._forest:

class RandomForestClassifier(ForestClassifier)
 |  RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None)
 |  
 |  A random forest classifier.
 |  
 |  A random forest is a meta estimator that fits a number of decision tree
 |  classifiers on various sub-samples of the dataset and uses averaging to
 |  improve the predictive accuracy and control over-fitting.
 |  The sub-sample size is controlled with the `max_samples` parameter if
 |  `bootstrap=True` (default), otherwise the whole dataset is used to build
 |  each tree.
 |  
 |  Read more in the :ref:`User Guide <forest>`.
 |  
 |  Parameters
 |  ----------


In [26]:
from sklearn_nature_inspired_algorithms.model_selection import NatureInspiredSearchCV

In [28]:
def Select_NIS_model(val:int=1):
    model = RandomForestClassifier()
    param_grid = {'n_estimators':list(range(100,1000,100)), 'min_samples_split':list(range(2,20,2)), 'min_samples_leaf':list(range(1,10,1))}
    if val == 1:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='ba'), 'Bat Algorithm'
    elif val == 2:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='hba'), 'Hybrid Bat Algorithm'
    elif val == 3:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='fa'), 'Firefly Algorithm'
    elif val == 4:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='hsaba'), 'Hybrid Self Adaptive Bat Algorithm'
    else:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='gwo'), 'Grey Wolf Optimizer'
    '''
    ba, Bat Algorithm (alpha=1, betamin=1, gamma=2)
    hba, Hybrid Bat Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    fa, Firefly Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    hsaba, Hybrid Self Adaptive Bat Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    gwo, Grey Wolf Optimizer
    '''

In [29]:
def Model_try(DataFileName:str,model_val:int):
    df = pd.read_csv(DataFileName)
    df_cols = list(df.columns)
    y = df[df_cols[-1]]
    X = df[df_cols[:-1]]
    Encoder = En.fit(y)
    y = Encoder.transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,shuffle=False,random_state=1412)
    model, NIA_Name = Select_NIS_model(model_val)
    train_time = time.time()
    model.fit(X_train,y_train)
    train_time = time.time() - train_time
    test_time = time.time()
    Score = model.score(X_test,y_test) 
    test_time = time.time() - test_time

    print(f'For {NIA_Name} we get a score of {Score}.\nThe Training time is {train_time}.\nThe Testing Time is {test_time}.')

    MyDict = { 'Algo Name' : NIA_Name, 'Train Time' : train_time, 'Test Time' : test_time, 'Best Estimator' : model.best_estimator_, 'Best Para' : model.best_params_, 'Best Score' : model.best_score_ }
    df = pd.DataFrame(model.cv_results_)
    df.to_csv(f"{NIA_Name}_results.csv")
    return MyDict, NIA_Name
    #https://github.com/Suji04/NormalizedNerd/blob/master/Scikit-Learn-Videos/GridSearchCV/GridSearchCV.ipynb


In [30]:
DataFile =  Download_Dataset_No_Nulls(Link)


In [31]:
score_lst = []
train_lst = []
test_lst = []
name_lst = []
for i in range(5):
    Score, train_time, test_time, name = Model_try(DataFile,i)
    score_lst.append(Score)
    train_lst.append(train_time)
    test_lst.append(test_time)
    name_lst.append(name)
df = pd.DataFrame(data= { 'NIA' : name_lst, 'Accuracy' : score_lst, 'Training Time' : train_lst, 'Testing Time' : test_lst } )

Fitting at most 729 candidates


KeyboardInterrupt: 

Basement!!

Don't go down there!

In [None]:
return

In [None]:
def Model_try_Basic(DataFileName:str):
    df = pd.read_csv(DataFileName)
    df_cols = list(df.columns)
    y = df[df_cols[-1]]
    X = df[df_cols[:-1]]
    Encoder = En.fit(y)
    y = Encoder.transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,shuffle=True,random_state=1412)
    model = RandomForestClassifier()
    model.fit(X_train,y_train)
    Score = model.score(X_test,y_test) 
    y_pre = model.predict(X_test)
    print(f'For the basic RandomForestClassifier we get a score of {Score}')
    return Score
