In [1]:
from sklearn.preprocessing import LabelEncoder
import torch
import pandas as pd
import numpy as np
import pickle

In [2]:
BSC = pd.read_csv('./original_data/Base_station_basic_info.csv')
CLD = pd.read_csv('./original_data/Cell-level_data.csv')
ECD = pd.read_csv('./original_data/Energy_consumption_data.csv')

In [3]:
merged_df = CLD.merge(BSC, on=['BS', 'CellName'], how='inner')
fullmerged_df = merged_df.merge(ECD, on=['BS', 'Time'], how='left') # merge + wyjscie

In [4]:
unique_cells_per_base = fullmerged_df.groupby('BS')['CellName'].nunique()
one_cell_bases = unique_cells_per_base[unique_cells_per_base == 1].index
two_cell_bases = unique_cells_per_base[unique_cells_per_base == 2].index
four_cell_bases = unique_cells_per_base[unique_cells_per_base == 4].index

In [5]:
fullmerged_df_1 = fullmerged_df[fullmerged_df['BS'].isin(one_cell_bases)]
fullmerged_df_2 = fullmerged_df[fullmerged_df['BS'].isin(two_cell_bases)]
fullmerged_df_4 = fullmerged_df[fullmerged_df['BS'].isin(four_cell_bases)]


In [6]:
joined_df_4 = fullmerged_df_4.pivot_table(index=['Time', 'BS'], columns='CellName', values=['load', 'ESMode1', 'ESMode2', 'ESMode3',
       'ESMode4', 'ESMode5', 'ESMode6', 'Frequency', 'Bandwidth', 'TXpower'], aggfunc='sum', fill_value=999)

# dokleic energy i antennas
joined_df_4.columns = ['{}_{}'.format(col[1], col[0]) for col in joined_df_4.columns]
joined_df_4 = joined_df_4.sort_index(axis=1)
joined_df_4 = joined_df_4.reset_index()
joined_df_4 = joined_df_4.merge(fullmerged_df_4[['Time', 'BS', 'RUType', 'Mode', 'Antennas', 'Energy']], on=['Time', 'BS'], how='left')
joined_df_4 = joined_df_4.drop_duplicates()

In [7]:
joined_df_4['Time'] = pd.to_datetime(joined_df_4['Time'], format='%m/%d/%Y %H:%M')

joined_df_4['Year'] = joined_df_4['Time'].dt.year
joined_df_4['Day'] = joined_df_4['Time'].dt.day
joined_df_4['Hour'] = joined_df_4['Time'].dt.hour
joined_df_4['Month'] = joined_df_4['Time'].dt.month

joined_df_4 = joined_df_4.sort_values(by=['BS','Time'])
joined_df_4 = joined_df_4.reset_index()
# joined_df_4.to_csv('./joined_df_4.csv', index=False)

In [8]:
joined_df_4['Energy_1'] = joined_df_4['Energy'].shift(1, fill_value=0)
bs = None
past_energy_cell4_dict = {}
for index, row in joined_df_4.iterrows():
    time = row.Time
    if bs != row.BS:
        bs = row.BS
        joined_df_4.at[index, 'Energy_1'] = 0.0
    elif np.isnan(row.Energy_1):
        try:
            joined_df_4.at[index, 'Energy_1'] = joined_df_4.at[index-1, 'Energy_1']
        except:
            print('Mamy problem')
    past_energy_cell4_dict[(bs, time)] = [joined_df_4.at[index, 'Energy_1']]
    
# with open('./pickle/past_energy_cell4_dict.pkl', 'wb') as file:
#     pickle.dump(past_energy_cell4_dict, file)
# joined_df_4.to_csv('./a.csv', index=False)

In [9]:
# first_cell_dict = {}
sec_third_cell_dict = {}
for row in joined_df_4.itertuples(index=False):
    bs = row.BS
    bw = row.Cell2_Bandwidth
    hour = row.Hour
    if bw != 999:
        # big_dict[(bs, hour)] = [bw, es_mode1, es_mode2, es_mode3, es_mode4, es_mode5, es_mode6, fr, txp, load]
        sec_third_cell_dict[(bs, hour)] = row[23:43]

# with open('./pickle/sec_third_cell_dict.pkl', 'wb') as file:
    # pickle.dump(sec_third_cell_dict, file)

In [10]:
for index, row in joined_df_4.iterrows():
    if row.Cell2_Bandwidth == 999:
        tmp_list = sec_third_cell_dict[(row.BS, row.Hour)]
        # print(tmp_list)
        joined_df_4.at[index, 'Cell2_Bandwidth'] = tmp_list[0]
        joined_df_4.at[index, 'Cell2_ESMode1'] = tmp_list[1]
        joined_df_4.at[index, 'Cell2_ESMode2'] = tmp_list[2]
        joined_df_4.at[index, 'Cell2_ESMode3'] = tmp_list[3]
        joined_df_4.at[index, 'Cell2_ESMode4'] = tmp_list[4]
        joined_df_4.at[index, 'Cell2_ESMode5'] = tmp_list[5]
        joined_df_4.at[index, 'Cell2_ESMode6'] = tmp_list[6]
        joined_df_4.at[index, 'Cell2_Frequency'] = tmp_list[7]
        joined_df_4.at[index, 'Cell2_TXpower'] = tmp_list[8]
        joined_df_4.at[index, 'Cell2_load'] = tmp_list[9]
        joined_df_4.at[index, 'Cell3_Bandwidth'] = tmp_list[10]
        joined_df_4.at[index, 'Cell3_ESMode1'] = tmp_list[11]
        joined_df_4.at[index, 'Cell3_ESMode2'] = tmp_list[12]
        joined_df_4.at[index, 'Cell3_ESMode3'] = tmp_list[13]
        joined_df_4.at[index, 'Cell3_ESMode4'] = tmp_list[14]
        joined_df_4.at[index, 'Cell3_ESMode5'] = tmp_list[15]
        joined_df_4.at[index, 'Cell3_ESMode6'] = tmp_list[16]
        joined_df_4.at[index, 'Cell3_Frequency'] = tmp_list[17]
        joined_df_4.at[index, 'Cell3_TXpower'] = tmp_list[18]
        joined_df_4.at[index, 'Cell3_load'] = tmp_list[19]

# joined_df_4.to_csv('./a.csv', index=False)

In [11]:
#zmiana stringa np. RUType na liczby
label_encoders = {}
object_cols = ['RUType', 'Mode'] 

for col in object_cols:
    le = LabelEncoder()
    joined_df_4[col] = le.fit_transform(joined_df_4[col])
    label_encoders[col] = le
joined_df_4['RUType'].value_counts()

RUType
0    280
Name: count, dtype: int64

In [12]:
# To tutaj nie ma zadnego sensu bo wszystkie stacja są jednego typu
# Encoded_rutype = torch.nn.functional.one_hot(torch.tensor(joined_df_4['RUType'].values).long(), len(joined_df_4['RUType'].value_counts()))
# Encoded_rutype = pd.DataFrame(Encoded_rutype, columns=[f"RUType{i}" for i in range(len(joined_df_4['RUType'].value_counts()))])
# Encoded_rutype.value_counts()

In [13]:
# joined_df_4_numpy = joined_df_4.to_numpy()
# Encoded_rutype_numpy = Encoded_rutype.to_numpy()
# chuj = joined_df_4.columns
# joined_df_4 = np.concatenate((joined_df_4_numpy,Encoded_rutype_numpy), axis=1)
# joined_df_4 = pd.DataFrame(joined_df_4, columns = chuj.append(Encoded_rutype.columns))
# joined_df_4 = pd.concat([joined_df_4, Encoded_rutype], axis=1, ignore_index=)

# joined_df_4 = joined_df_4.drop(["BS", "Year", "Month", "RUType", "Time", "index"], axis=1)
joined_df_4 = joined_df_4.drop(["Year", "Month", "RUType", "index"], axis=1)

# joined_df_4 = joined_df_4.drop(["BS", "Time"], axis=1)
# joined_df_4 = joined_df_4.dropna(subset=['Energy'])

cols = [col for col in joined_df_4 if col != 'Energy'] + ['Energy']
joined_df_4 = joined_df_4[cols]

In [14]:
# joined_df_4.to_csv('./prepared_data/Four_Cell_merged.csv', index=False)
print(joined_df_4.columns)

Index(['Cell0_Bandwidth', 'Cell0_ESMode1', 'Cell0_ESMode2', 'Cell0_ESMode3',
       'Cell0_ESMode4', 'Cell0_ESMode5', 'Cell0_ESMode6', 'Cell0_Frequency',
       'Cell0_TXpower', 'Cell0_load', 'Cell1_Bandwidth', 'Cell1_ESMode1',
       'Cell1_ESMode2', 'Cell1_ESMode3', 'Cell1_ESMode4', 'Cell1_ESMode5',
       'Cell1_ESMode6', 'Cell1_Frequency', 'Cell1_TXpower', 'Cell1_load',
       'Cell2_Bandwidth', 'Cell2_ESMode1', 'Cell2_ESMode2', 'Cell2_ESMode3',
       'Cell2_ESMode4', 'Cell2_ESMode5', 'Cell2_ESMode6', 'Cell2_Frequency',
       'Cell2_TXpower', 'Cell2_load', 'Cell3_Bandwidth', 'Cell3_ESMode1',
       'Cell3_ESMode2', 'Cell3_ESMode3', 'Cell3_ESMode4', 'Cell3_ESMode5',
       'Cell3_ESMode6', 'Cell3_Frequency', 'Cell3_TXpower', 'Cell3_load',
       'Mode', 'Antennas', 'Day', 'Hour', 'Energy_1', 'Energy'],
      dtype='object')
