In [1]:
Link = 'https://storage.googleapis.com/kaggle-data-sets/961564/1629646/compressed/station_day.csv.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20220929%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20220929T144621Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=a2f0499d08b759f4b5f8224e2d2a006b8059ba95b3240e5799a734a2f9655c8cc7af4b49cbe7c7672c2a428eccf7b1f2a7a40201b1ca8eefd34d7816eeb43dd71aba9a2e61e67583b7a241387b6ff7b43de45c1b1422af55aad8a4eb8422a888fd8fc6790898bb1c2dcd621de1baaaefd1fa4c21bb24dce60fdad17c4c0d67667a9846e019f345769e4ca11c24e490d3e73d46027f18cb43f2a9adad3f56a9eac58d29f4f6c85f4d7b3a3f62f9d84e329ce71c47057e84d6690e3b91dce55aa3a46517d8cc9a5703736ba131021e21252003866b93eabe4addc033e50bf67d1319f1b7f816e58a85ce0321b7e992aeb0ab335137b9aad5f9eac436bf0b92441b'

In [2]:
import os
import json
import wget 
import shutil
import numpy as np
import pandas as pd 

In [3]:
Home = os.getcwd()
DataFolder = os.path.join(Home,'Data')

def CheckDataFolder():
    if not os.path.exists(DataFolder):
        os.mkdir(DataFolder)


In [4]:
def Download(Link:str):
    CheckDataFolder()
    FileName = wget.download(Link)
    shutil.unpack_archive(FileName,DataFolder)
    os.remove(FileName)

In [5]:
def Download_Dataset_No_Nulls(Link:str,Is_There:bool=True)->str:
    if not Is_There:
        Download(Link)
    DataFileName = f'{DataFolder}\\{[x for x in os.listdir(DataFolder) if x.endswith(".csv")][0]}'
    try:
        df = pd.read_csv(DataFileName)
        df = df.drop(columns=['StationId', 'Date', 'AQI'])
        df = df.drop(columns=['Unnamed: 0'])
        df = df.dropna()
        df.to_csv(DataFileName)
    except:
        pass
    return DataFileName

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
En = LabelEncoder()

In [7]:
from sklearn_nature_inspired_algorithms.model_selection import NatureInspiredSearchCV

In [8]:
def Select_NIS_model(val:int=1,n_estimators:int=100):
    model = RandomForestClassifier()
    param_grid = {'n_estimators':[n_estimators]}
    if val == 1:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='ba'), 'Bat Algorithm'
    elif val == 2:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='hba'), 'Hybrid Bat Algorithm'
    elif val == 3:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='fa'), 'Firefly Algorithm'
    elif val == 4:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='hsaba'), 'Hybrid Self Adaptive Bat Algorithm'
    else:
        return NatureInspiredSearchCV(estimator=model, param_grid=param_grid, algorithm='gwo'), 'Grey Wolf Optimizer'
    '''
    ba, Bat Algorithm (alpha=1, betamin=1, gamma=2)
    hba, Hybrid Bat Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    fa, Firefly Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    hsaba, Hybrid Self Adaptive Bat Algorithm (A=0.9, r=0.1, Qmin=0.0, Qmax=2.0)
    gwo, Grey Wolf Optimizer
    '''

In [9]:
def Model_try(DataFileName:str,model_val:int,n_estimators:int):
    df = pd.read_csv(DataFileName)
    df_cols = list(df.columns)
    y = df[df_cols[-1]]
    X = df[df_cols[:-1]]
    Encoder = En.fit(y)
    y = Encoder.transform(y)
    #X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,shuffle=False,random_state=1412)
    model, NIA_Name = Select_NIS_model(model_val,n_estimators)
    model.fit(X,y)

    print(f'For {NIA_Name} we get a score of {model.best_score_}.')
    #df = pd.DataFrame(model.cv_results_)

    #df.to_csv(f"{NIA_Name}_results.csv")
    return NIA_Name, model.best_score_


In [10]:
#DataFile =  Download_Dataset_No_Nulls(Link)
DataFile = 'c:\\Users\\chand\\Desktop\\work_simple\\Green-Computing\\temp\\Data\\station_day.csv'

In [11]:
from datetime import datetime as dt 

In [12]:
def Get_hms(val):
 
    sec = val/1000
    mini = sec/60 
    hr = mini/60
    ms = sec%1000
    hr %= 24
    mini %= 60
    sec %= 60 

    return int(hr), int(mini), int(sec) 
    

In [13]:
def Check_Time(df_timestamp,h,m,s):
    df_h,df_m,df_s = Get_hms(df_timestamp)
    #print(f'checking {df_h}:{df_m}:{df_s}')
    if df_h == h and df_m == m and df_s == s:
        return True
    return False


In [14]:
def Get_req_start_Idx(s_h,s_m,s_s,prev_idx,flag=True):
    df = pd.read_csv(energy_file)
    for i in range(prev_idx,len(df)):
        df_timestamp = df['TimeStamp (ms)'][i]
        if Check_Time(df_timestamp,s_h,s_m,s_s):
            return i
    if flag:
        return Get_req_start_Idx(s_h,s_m,s_s,0,False)
    else:
        return -1


In [19]:
def Cal_Energy(start_idx,e_h,e_m,e_s):
    #total_energy_lst = []
    total_energy = 0
    cpu_e = 0
    monitor_e = 0
    disk_e = 0
    base_e = 0
    df = pd.read_csv(energy_file)
#['TimeStamp (ms)', ' Total Power (W)', ' CPU (W)', ' Monitor (W)',' Disk (W)', ' Base (W)', ' Application (W)']
    for i in range(start_idx,len(df)):
        #total_energy_lst.append(df[' Total Power (W)'][i])
        total_energy += df[' Total Power (W)'][i]
        cpu_e += df[' CPU (W)'][i]
        monitor_e += df[' Monitor (W)'][i]
        disk_e += df[' Disk (W)'][i]
        base_e += df[' Base (W)'][i]
        df_timestamp = df['TimeStamp (ms)'][i]
        if Check_Time(df_timestamp,e_h,e_m,e_s):
            return total_energy,cpu_e,monitor_e,disk_e,base_e, i
    return total_energy,cpu_e,monitor_e,disk_e,base_e, len(df)

In [20]:
def Get_Time_Energy(start_time,end_time,prev_idx=0):

    s_d = start_time.day
    s_h = start_time.hour
    s_m = start_time.minute
    s_s = start_time.second
    #print('Start',s_d,s_h,s_m,s_s)
    e_d = end_time.day 
    e_h = end_time.hour
    e_m = end_time.minute
    e_s = end_time.second
    #print('End',e_d,e_h,e_m,e_s)
    time_taken = 0
    time_taken = 24*(time_taken + e_d-s_d)
    time_taken = 60*(time_taken + e_h-s_h)
    time_taken = 60*(time_taken + e_m - s_m)
    time_taken = (time_taken + e_s - s_s)

    start_idx = Get_req_start_Idx(s_h,s_m,s_s,prev_idx)
    if start_idx == -1:
        print("FAILED TO FIND START IDX")
        return
    # total_energy,cpu_e,monitor_e,disk_e, i, df[' Base (W)'][0]
    total,cpu,monitor,disk,base,end_idx = Cal_Energy(start_idx,e_h,e_m,e_s)

    return total,cpu,monitor,disk,base, time_taken, end_idx


In [21]:
energy_file = [ os.path.join(os.getcwd(),x) for x in os.listdir() if x.endswith('.csv')][0]
energy_file


'c:\\Users\\chand\\Desktop\\work_simple\\Green-Computing\\temp\\dazhs.csv'

In [23]:
MyDict = dict()
My_prev_idx = 0
for M_type in range(5):
    print(M_type)
    for Para in range(50,501,50):
        print(Para)
        start_time = dt.now()
        Name, Score = Model_try(DataFile,M_type,Para)
        end_time = dt.now()
        #total,cpu,monitor,disk,base, time_taken, end_idx
        total,cpu,monitor,disk,base, time_taken, My_prev_idx = Get_Time_Energy(start_time,end_time,prev_idx=My_prev_idx)
        TempDict = {'Total Energy (J)' : total, 'Total CPU  Energy (J)' : cpu, 'Total Monitor Energy (J)' : monitor, 'Total Disk Energy (J)' : disk, 'Base Energy (J)' : base, 'Time (Sec)' : time_taken, 'Score' : Score*100, 'n_estimators' : Para}
        if Name in MyDict:
            MyDict[Name].append(TempDict)
        else:
            MyDict[Name] = [TempDict]
        with open('Kaito Kids Dairy.json','w') as kid:
            json.dump(MyDict,kid)


4
50
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7152226682959144.
100
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7160952310652844.
150
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.722885884173275.
200
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7235633791625862.
250
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7223025614576344.
300
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7203627442637426.
350
Fitting at most 1 candidates
Optimization finish

In [28]:
MyDict = dict()
with open('Kaito Kids Dairy.json','r') as kid:
    MyDict = json.load(kid)


In [29]:
MyDict

{'Grey Wolf Optimizer': [{'Total Energy (J)': 49.9,
   'Total CPU  Energy (J)': 35.1,
   'Total Monitor Energy (J)': 0.0,
   'Total Disk Energy (J)': 0.0,
   'Base Energy (J)': 1.5,
   'Time (Sec)': 540,
   'Score': 71.92972294169171,
   'n_estimators': 50},
  {'Total Energy (J)': 58.999999999999986,
   'Total CPU  Energy (J)': 35.49999999999999,
   'Total Monitor Energy (J)': 0.0,
   'Total Disk Energy (J)': 0.0,
   'Base Energy (J)': 1.5,
   'Time (Sec)': 960,
   'Score': 70.94045801670276,
   'n_estimators': 100},
  {'Total Energy (J)': 121.39999999999999,
   'Total CPU  Energy (J)': 72.39999999999999,
   'Total Monitor Energy (J)': 0.0,
   'Total Disk Energy (J)': 0.0,
   'Base Energy (J)': 1.5,
   'Time (Sec)': 1980,
   'Score': 71.90058266449707,
   'n_estimators': 150},
  {'Total Energy (J)': 164.90000000000003,
   'Total CPU  Energy (J)': 100.1,
   'Total Monitor Energy (J)': 0.0,
   'Total Disk Energy (J)': 0.0,
   'Base Energy (J)': 1.5,
   'Time (Sec)': 2640,
   'Score': 72.

In [31]:
key_lst = list(MyDict.keys())
key_lst

['Grey Wolf Optimizer',
 'Bat Algorithm',
 'Hybrid Bat Algorithm',
 'Firefly Algorithm',
 'Hybrid Self Adaptive Bat Algorithm']

In [32]:
len(MyDict[key_lst[0]])

10

In [33]:
inner_key_lst = list(MyDict[key_lst[0]][0].keys())
inner_key_lst

['Total Energy (J)',
 'Total CPU  Energy (J)',
 'Total Monitor Energy (J)',
 'Total Disk Energy (J)',
 'Base Energy (J)',
 'Time (Sec)',
 'Score',
 'n_estimators']

In [34]:
name_lst = []
TE_lst = []
CPU_lst = []
Mon_lst = []
Disk_lst = []
B_lst = []
Time_lst = []
Acc_lst = []
n_estimator_lst = []
for i in MyDict.keys():
    curr_lst = MyDict[i]
    for j in range(len(curr_lst)):
        inner_dict = curr_lst[j]
        name_lst.append(i)
        TE_lst.append(inner_dict['Total Energy (J)'])
        CPU_lst.append(inner_dict['Total CPU  Energy (J)'])
        Mon_lst.append(inner_dict['Total Monitor Energy (J)'])
        Disk_lst.append(inner_dict['Total Disk Energy (J)'])
        Time_lst.append(inner_dict['Time (Sec)']//60)
        B_lst.append(inner_dict['Base Energy (J)']*Time_lst[-1])
        Acc_lst.append(inner_dict['Score'])
        n_estimator_lst.append(inner_dict['n_estimators'])


Final_dict = {'Algo Name':name_lst, 'n_estimators' : n_estimator_lst, 'Time Taken' : Time_lst, 'Accuracy' : Acc_lst,'Total Energy (J)':TE_lst,'Base Energy (J)' : B_lst,'CPU Energy (J)' : CPU_lst, 'Monitor Energy (J)' : Mon_lst, 'Disk Energy (J)' : Disk_lst }

df = pd.DataFrame(data=Final_dict)

df.to_csv('Full_Result.csv')

"\n['Total Energy (J)',\n 'Total CPU  Energy (J)',\n 'Total Monitor Energy (J)',\n 'Total Disk Energy (J)',\n 'Base Energy (J)',\n 'Time (Sec)',\n 'Score',\n 'n_estimators']\n "

In [None]:
return

SyntaxError: 'return' outside function (3438313781.py, line 1)

Basement!

Don't go down there!!

There are only faulty or once used code chunks!!!

In [None]:
score_lst = []
estimator_lst = []
para_lst = []
name_lst = []

print(score_lst[-1],estimator_lst[-1],para_lst[-1],name_lst[-1])

IndexError: list index out of range

In [None]:
score_lst = []
estimator_lst = []
para_lst = []
name_lst = []
MyDict = dict()
for i in range(5):
    ADict, NIA_Name = Model_try(DataFile,i)
    MyDict[NIA_Name] = ADict
    para_lst.append(ADict['Best Para'])
    score_lst.append(ADict['Best Score'])
    estimator_lst.append(ADict['Best Estimator'])
    name_lst.append(NIA_Name)


Fitting at most 10 candidates
Optimization finished, 10 candidates were fitted
For Grey Wolf Optimizer we get a score of 0.7256973238242688.
Fitting at most 10 candidates
Optimization finished, 10 candidates were fitted
For Bat Algorithm we get a score of 0.7264726583050966.
Fitting at most 10 candidates
Optimization finished, 10 candidates were fitted
For Hybrid Bat Algorithm we get a score of 0.7237589641143928.
Fitting at most 10 candidates
Optimization finished, 10 candidates were fitted
For Firefly Algorithm we get a score of 0.7248248080705121.
Fitting at most 10 candidates
Optimization finished, 10 candidates were fitted
For Hybrid Self Adaptive Bat Algorithm we get a score of 0.7260858138379175.


In [None]:
k_lst = list(MyDict.keys())
k2_lst = list(MyDict[k_lst[0]].keys())
for i in k_lst:
  MyDict[i]['Best Estimator'] = None 

with open("NIA_Dicts.json", "w") as outfile:
    json.dump(MyDict,outfile) 

In [None]:
df = pd.DataFrame(data= { 'NIA' : name_lst, 'Accuracy' : score_lst, 'Estimator' : estimator_lst, 'Para' : para_lst } )

In [None]:
df.to_csv('Final Result.csv')

DON'T BE UP RUNNING IT PAST 00:00!!
YOU SHOULD BE IN BED BY THEN!!!!!!!

Here is a bed time story for you 


She carefully assured her child that there wasn't a monster under the bed, turned off the light and left.










Thanks to her, I now get to enjoy the feast I'd been craving for so long.
