In [2]:
import pandas as pd
import seaborn
from datetime import datetime

In [3]:
# loading pre and post data from hfdata
postx_data = pd.read_csv('/kaggle/input/data-hf/cleaned_postx_data.csv')
pretx_data = pd.read_csv('/kaggle/input/data-hf/cleaned_pretx_data.csv')

#Sorting Data
postx_data = postx_data.sort_values(by='Date')
pretx_data = pretx_data.sort_values(by='Date')


In [4]:
# #Inputs
# '''
#power
# average DB = (Sum of all db)/No. of Entries Towers
# average DT = (Sum of all dt)/No. of Entries Towers
# average freq offset = (Sum of all freq)/No. of Entries Towers 
# tt,date -> day/night , weather(thandi , garmi) (0/1) , '0-3 , 3-6 , 6-9 , 9-12 , 12-15 , 15-18 , 18-21 , 21-24'
# '''

# #Outputs (Success Metrics)
# '''
# SM-1 :- No. of pos towers == No. of entries in a file(in single transmisison) 
# SM-2 :- Average Distance = (Sum of all distances)/No. of pos Towers
# '''

# '''
# T1 is more successful than T2 if{
#     SM-1 of T1 > SM-1 of T2 -> more number of towers reieved our transmission
#     SM-2 of T1 > SM-2 of T2 -> larger distance covered by transmision tower
# }
# '''

#get day or night
def getdaynight(time):
    # Extract hours from the UTC time string
    if (len(str(time)) ==4):
        hours = int(str(time)[:2])
    else:
        hours = int(str(time)[:1])
    # Daytime is generally considered to be between 6 AM and 8 PM UTC
    if 6 <= hours < 20:
        return 1 # day -> 1
    else:
        return 0 # night -> 0

#function get_weather is designed to get the weather for a specific location and time, based on latitude, longitude, and a given date.
def get_weather(latitude, longitude, time):
    '''
    time = datetime(yy , mm , day)     #format of 'time' in get_weather function
    '''
    
    #datetime.strptime converts the time string into a datetime object(convert into "YYYYMMDD")
    time = datetime.strptime(time, '%Y%m%d')
    factor = 0
  # Hemisphere based on latitude sign
    hemisphere = "Northern" if latitude >= 0 else "Southern"  #latitude is non-negative (i.e., 0 or positive), it is in the "Northern", if its negative- its "Southern"
  # Month based on datetime object
    month = time.strftime("%B")  # Get full month name- (e.g., "July" for the 7th month)
    
    #factor calculations-winter anomaly
    if hemisphere == 'Northern':     #Sets factor to 1 if the date falls within the defined winter months for the respective hemisphere.
        if month == "november":
            factor = 1
        elif month == "december":
            factor = 1
        elif month == "january":
            factor = 1
        elif month == "february":
            factor = 1
    
    elif hemisphere == 'Southern':
        if month == "may":
            factor=1
        elif month == "june":
            factor=1
        elif month == "july":
            factor=1
        elif month == "august":
            factor=1
    
    return factor

# get number of entries / no.of pos towers
def get_pos_towers(dataset):    #for all entries, it will extract unique entries(unique receiving stations)
    posset = set()    
    pos_towers = 0  #This will keep track of the number of unique receiving stations.
    for index, row in dataset.iterrows():    ##for all iterations it will check if that entry is already in set or not, if no, it will put, if yes, it will move forward
        if row['Recieving_Station'] not in posset:    #if Recieving Station row is not there in posset, add
            posset.add(row['Recieving_Station'])   #put all unique pos_towers in Recieving Station
            pos_towers+=1   #pos_towers is incremented by 1 each time a new unique receiving station is found and added
    
    return pos_towers     #return no. of towers


In [5]:
# dataset = {
#     'tt':[],
#     'date':[],
#     'dt':[],
#     'db':[],
#     'freq':[],
#     'pos_towers':[]
#     'Distance':[]
# }

#all the dates when transmission happend
datelist = set() #datelist is initialized to store unique dates 

#getting all unique dates in pretx
for index, row in pretx_data.iterrows():   ##for all iterations it will check if that entry is already in set or not, if no, it will put, if yes, it will move forward
    date = row['Date']
    if date not in datelist:
        datelist.add(date)

#getting all unique dates in postx
for index, row in postx_data.iterrows():   ##for all iterations it will check if that entry is already in set or not, if no, it will put, if yes, it will move forward
    date = row['Date']
    if date not in datelist:
        datelist.add(date)

#converting into list
datelist = list(datelist)   #After collecting all unique dates in the set, the set is converted to a list

In [9]:
#Leeds
latitude = 53.801277
longitude = -1.548567

dataset = {
    'tt':[],
    'date':[],
    'dt':[],
    'db':[],
    'freq':[],
    'day_or_night':[],
    'weather':[],
    'power':[],
    'band':[],
    'pos_towers':[],
    'average_distance':[]
}

#getting unique times 
for i in range(len(datelist)):  #loop iterates over each date in the datelist
    
    # unique date - > datelist[i]
    date = datelist[i]   ##For each iteration, it assigns the current date to the variable 'date'
    
    #df_temp contains all entries
    df_temp = pretx_data.query(f'Date == {date}')   #filtered all the entries in pre whose date is = variable'date'
    timelist = set() #timelist is initialized to store unique transmission times for the current date
    
    #query to see all entries for this date (dates are same, so just time will be different) in pretx
    for index, row in df_temp.iterrows():    #code iterates over each row in the df_temp DataFrame
        if row[f'Transmission_Time'] not in timelist:   #it checks if the value in the Transmission_Time column is not already in timelist
            timelist.add(row[f'Transmission_Time'])   #if not added, it adds the time to timelist
    
    for time in timelist:
        #unique time chosen -> time  (for each transmission time in timelist)
        
        #getting all the entries of signals recieved before making a transmission at date:'date' at time:'time'
        df_pretx = pretx_data.query(f'Date == {date}').query(f'Transmission_Time == {time}')
        
        #getting all the entries of signals that recieved the transmission made at date:'date' at time:'time'
        df_postx = postx_data.query(f'Date == {date}').query(f'Transmission_Time == {time}')
        
        #getting day/night day->1 night->0 
        day_or_night = getdaynight(time)
        
        #getting average dt:
        average_dt = df_pretx[['dt']].mean()   #df_pretx have all transmission entries of pre
        average_dt = average_dt.iloc[0] #getting float value from dataframe (iloc[0] extracts the single value from this Series.)
        
        #getting average db:
        average_db = df_pretx[['db']].mean()
        average_db = average_db.iloc[0]
        
        #getting average freq:
        average_freq = df_pretx[['freq']].mean()
        average_freq = average_freq.iloc[0]
        
        #getting weather 0-> summer , 1-> winter
        weather = get_weather(latitude , longitude , str(date))
        
        #getting_band
        band = df_pretx.iloc[0]['band']
        
        #getting power
        power = df_postx[['power']].mean()
        power = power.iloc[0]
        
        #Outputs ------------------------------------------------------------------
        
        #No. of towers
        #pos_towers = get_pos_towers(df_postx)
        pos_towers = len(df_postx)
        
        #average distance
        average_distance = df_postx[['Distance']].mean()
        average_distance = average_distance.iloc[0]
        
#         dataset = {
#     'tt':[],
#     'date':[],
#     'dt':[],
#     'db':[],
#     'freq':[],
#     'day_or_night':[],
#     'weather':[],
#     'power':[],
#     'band':[],
#     'pos_towers':[],
#     'average_distance':[]
# }
        
        dataset['tt'].append(time) #(12 dec, 13 dec.....)
        dataset['date'].append(date) #(6am on 12th dec, 7 am on 12th dec.....)
        dataset['dt'].append(average_dt) #(trans time is 6 am on 12 dec, how many towers we captured before trans time)
        dataset['db'].append(average_db) #(on 12 dec 6 am, signals sent to how many towers)
        dataset['freq'].append(average_freq)
        dataset['day_or_night'].append(day_or_night)
        dataset['weather'].append(weather)
        dataset['power'].append(power)
        dataset['band'].append(band)
        dataset['pos_towers'].append(pos_towers)
        dataset['average_distance'].append(average_distance)        
        

In [7]:
# # (example)
# date = datelist[4]
# time = 1833
# df_temp = postx_data.query(f'Date == {date}').query(f'Transmission_Time == {time}')
# get_pos_towers(df_temp)

In [10]:
#convert dataset dict into dataframe
final_dataset = pd.DataFrame(dataset)

#sorting Dateset
final_dataset = final_dataset.sort_values(by='date')

#saving dataset
final_dataset.to_csv('cleaned_2_final_data.csv')
final_dataset

Unnamed: 0,tt,date,dt,db,freq,day_or_night,weather,power,band,pos_towers,average_distance
187,2003,20210420,0.179741,-11.456897,1379.543103,0,0,7.0,40,1,1351.000000
185,1147,20210420,0.381513,-16.596639,1493.764706,1,0,10.0,20,13,1430.076923
184,1702,20210420,0.139944,-12.810056,1449.164804,1,0,,40,0,
188,2035,20210420,0.192593,-11.592593,1340.518519,0,0,7.0,20,1,1802.000000
189,1435,20210420,0.076531,-13.918367,1595.153061,1,0,,20,0,
...,...,...,...,...,...,...,...,...,...,...,...
462,929,20240305,0.058273,-13.258993,1633.971223,1,0,9.0,20,5,2633.200000
466,1404,20240307,0.347674,-12.523256,1430.720930,1,0,10.0,40,38,488.473684
467,1351,20240307,0.242400,-12.784000,1462.256000,1,0,10.0,20,53,1096.000000
474,1732,20240315,0.149640,-13.611511,1323.546763,1,0,6.0,40,21,497.380952
