In [2]:
import pandas as pd
import plotly.graph_objs as go
import numpy as np

In [3]:
df = pd.read_excel('dataset.xlsx')
df['time'] = pd.to_datetime(df['time'])
df

Unnamed: 0,time,paksha,tithi,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
0,2018-01-01 00:00:00,Shukla Paksha,Chaturdashi,7.3,90,5.8,11.2,0.214,Pausha,2074
1,2018-01-01 01:00:00,Shukla Paksha,Chaturdashi,7.1,90,5.5,13.3,0.214,Pausha,2074
2,2018-01-01 02:00:00,Shukla Paksha,Chaturdashi,6.8,90,5.2,13.9,0.214,Pausha,2074
3,2018-01-01 03:00:00,Shukla Paksha,Chaturdashi,8.9,83,6.3,12.5,0.214,Pausha,2074
4,2018-01-01 04:00:00,Shukla Paksha,Chaturdashi,13.8,62,6.7,7.7,0.214,Pausha,2074
...,...,...,...,...,...,...,...,...,...,...
32923,2021-10-03 20:00:00,Krishna Paksha,Dwadashi,26.6,78,22.5,19.5,0.349,Ashwina,2078
32924,2021-10-03 21:00:00,Krishna Paksha,Dwadashi,26.4,77,22.1,20.2,0.349,Ashwina,2078
32925,2021-10-03 22:00:00,Krishna Paksha,Dwadashi,26.2,77,21.8,19.1,0.349,Ashwina,2078
32926,2021-10-03 23:00:00,Krishna Paksha,Trayodashi,25.9,77,21.6,17.6,0.349,Ashwina,2078


In [4]:
# Resampling data from hourly to daily
# set 'time' column as the index
df = df.set_index('time')

# resample hourly data to daily
df_daily = df.resample('D').agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'tithi': 'first', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

# reset the index to get 'time' as a column again
df_daily = df_daily.reset_index()

df_daily
df.reset_index()


Unnamed: 0,time,paksha,tithi,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
0,2018-01-01 00:00:00,Shukla Paksha,Chaturdashi,7.3,90,5.8,11.2,0.214,Pausha,2074
1,2018-01-01 01:00:00,Shukla Paksha,Chaturdashi,7.1,90,5.5,13.3,0.214,Pausha,2074
2,2018-01-01 02:00:00,Shukla Paksha,Chaturdashi,6.8,90,5.2,13.9,0.214,Pausha,2074
3,2018-01-01 03:00:00,Shukla Paksha,Chaturdashi,8.9,83,6.3,12.5,0.214,Pausha,2074
4,2018-01-01 04:00:00,Shukla Paksha,Chaturdashi,13.8,62,6.7,7.7,0.214,Pausha,2074
...,...,...,...,...,...,...,...,...,...,...
32923,2021-10-03 20:00:00,Krishna Paksha,Dwadashi,26.6,78,22.5,19.5,0.349,Ashwina,2078
32924,2021-10-03 21:00:00,Krishna Paksha,Dwadashi,26.4,77,22.1,20.2,0.349,Ashwina,2078
32925,2021-10-03 22:00:00,Krishna Paksha,Dwadashi,26.2,77,21.8,19.1,0.349,Ashwina,2078
32926,2021-10-03 23:00:00,Krishna Paksha,Trayodashi,25.9,77,21.6,17.6,0.349,Ashwina,2078


In [12]:
# Yearly Temperature comparison for each month according to english calendar
def get_month_name(month_num):
    month_names = ['', 'January', 'February', 'March', 'April', 'May', 'June', 
                   'July', 'August', 'September', 'October', 'November', 'December']
    return month_names[month_num]

# years array
years = [2018, 2019, 2020]

# months array
months = range(1, 13)

# loop through each month and create a separate figure
for month in months:
    data = []
    layout = dict(title=f"Temperature for month {get_month_name(month)}")
    std_dev_arr = []
    final_std_dev = []

    for year in years:
        # filter data for the given year and month
        df_filtered = df_daily[(df_daily['time'].dt.year == year) & (df_daily['time'].dt.month == month)]
        
        std_dev = np.std(df_filtered['temperature'])
        # print(f"standard deviation for {month} {year} is {std_dev}")
        std_dev_arr.append(std_dev)

        # create a trace for the year
        trace = go.Scatter(
            x=df_filtered['time'].dt.day,
            y=df_filtered['temperature'],
            name=str(year)
        )
        
        print(year, std_dev_arr)
        data.append(trace)
    
    final_std_dev.append(np.average(std_dev_arr))
    fig = go.Figure(data=data, layout=layout)
    fig.show()

print(f'Average Monthly Standard Deviation in English Calendar is {np.average(final_std_dev)}')


2018 [1.350854503505252]
2019 [1.350854503505252, 1.5561346525744488]
2020 [1.350854503505252, 1.5561346525744488, 1.236354047391214]


2018 [2.5203156048642334]
2019 [2.5203156048642334, 1.7546749540307232]
2020 [2.5203156048642334, 1.7546749540307232, 2.7332130274148567]


2018 [1.8833957595883875]
2019 [1.8833957595883875, 3.505642208443092]
2020 [1.8833957595883875, 3.505642208443092, 2.4171561619278545]


2018 [1.7961458866963715]
2019 [1.7961458866963715, 3.1006340326328403]
2020 [1.7961458866963715, 3.1006340326328403, 2.1152113143421363]


2018 [1.7835668008262537]
2019 [1.7835668008262537, 1.8507250921123246]
2020 [1.7835668008262537, 1.8507250921123246, 2.3402518321120445]


2018 [2.080147420471699]
2019 [2.080147420471699, 1.864504622795536]
2020 [2.080147420471699, 1.864504622795536, 2.127832285528985]


2018 [1.8035420837526375]
2019 [1.8035420837526375, 2.3355114626258824]
2020 [1.8035420837526375, 2.3355114626258824, 1.7960632195990194]


2018 [0.9711410956956117]
2019 [0.9711410956956117, 1.0086125033858926]
2020 [0.9711410956956117, 1.0086125033858926, 1.32727324807282]


2018 [1.1469206625450779]
2019 [1.1469206625450779, 1.7048351717872798]
2020 [1.1469206625450779, 1.7048351717872798, 1.0990747809991805]


2018 [1.4852774764286827]
2019 [1.4852774764286827, 0.826260218493294]
2020 [1.4852774764286827, 0.826260218493294, 1.7245723508535986]


2018 [1.433941004907648]
2019 [1.433941004907648, 1.8362535087243452]
2020 [1.433941004907648, 1.8362535087243452, 2.257723664630668]


2018 [1.9930622022011724]
2019 [1.9930622022011724, 3.0494288876656204]
2020 [1.9930622022011724, 3.0494288876656204, 3.139900796719304]


Average Monthly Standard Deviation in English Calendar is 2.727463962195366


In [6]:
# Grouping hindu months together
hindu_month_grouped = df_daily.groupby(['vikramSamvat', 'hinduMonthName']).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'tithi': 'first', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})
hindu_month_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,tithi,hinduMonthName,vikramSamvat
vikramSamvat,hinduMonthName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2074,Magha,14.201437,68.306034,7.773851,17.169109,0.207441,Pratipada,Magha,2074
2074,Pausha,13.147917,72.3125,7.564583,8.94375,0.213646,Chaturdashi,Pausha,2074
2074,Phalguna,18.242222,61.7,10.035694,16.712083,0.195665,Pratipada,Phalguna,2074
2075,Adhik Jyeshtha,33.814028,38.956944,16.010972,17.7775,0.166796,Pratipada,Adhik Jyeshtha,2075
2075,Agrahayana,16.33319,66.423851,9.260632,12.248851,0.236864,Pratipada,Agrahayana,2075
2075,Ashadha,29.882083,76.954167,25.067361,13.050972,0.178196,Pratipada,Ashadha,2075
2075,Ashwina,25.335489,66.678161,17.913793,11.356753,0.323986,Pratipada,Ashwina,2075
2075,Bhadra,27.327361,81.336111,23.623472,14.20625,0.333201,Pratipada,Bhadra,2075
2075,Chaitra,24.499337,46.208333,10.854356,17.020265,0.187045,Pratipada,Chaitra,2075
2075,Jyeshtha,32.148276,47.262931,18.360345,14.835057,0.16745,Pratipada,Jyeshtha,2075


In [7]:
# Making a df containing paksha and tithi combined
combinedDF = df
combinedDF['paksha-tithi'] = combinedDF.apply(lambda row: row['paksha'] + '-' + row['tithi'], axis=1)
# combinedDF.drop(['paksha', 'tithi'], axis=1, inplace=True)

filtered = combinedDF[(combinedDF['vikramSamvat'] == 2074) & (combinedDF['hinduMonthName'] == 'Phalguna')]
filtered_grouped = filtered.groupby('paksha-tithi', sort=False).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

filtered_grouped


Unnamed: 0_level_0,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
paksha-tithi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Krishna Paksha-Pratipada,18.442857,57.333333,9.37619,20.880952,0.202,Phalguna,2074
Krishna Paksha-Dwitiya,17.509524,68.952381,11.138095,16.290476,0.20181,Phalguna,2074
Krishna Paksha-Tritiya,16.745455,67.590909,9.963636,15.813636,0.201,Phalguna,2074
Krishna Paksha-Chaturthi,14.440909,58.409091,5.954545,20.709091,0.201,Phalguna,2074
Krishna Paksha-Panchami,14.825,55.083333,5.129167,16.316667,0.200042,Phalguna,2074
Krishna Paksha-Shashthi,16.508333,47.791667,5.0,16.379167,0.2,Phalguna,2074
Krishna Paksha-Saptami,14.958333,46.958333,3.3875,22.845833,0.199875,Phalguna,2074
Krishna Paksha-Ashtami,14.823077,56.0,4.873077,14.988462,0.199,Phalguna,2074
Krishna Paksha-Navami,15.257692,54.346154,5.234615,16.803846,0.198808,Phalguna,2074
Krishna Paksha-Dashami,15.984615,55.769231,6.023077,8.853846,0.198,Phalguna,2074


In [8]:
# Yearly Temperature comparison for each month according to english calendar
def get_month_name(month_num):
    hindu_month_names = [
    '', 'Chaitra', 'Vaishakha', 'Jyeshtha', 'Ashadha', 'Shravana', 'Bhadra', 
    'Ashwina', 'Kartika', 'Agrahayana', 'Pausa', 'Magha', 'Phalguna'
    ]   
    return hindu_month_names[month_num]

# years array
years = [2075, 2076, 2077]

# months array
months = range(1, 13)

# loop through each month and create a separate figure
for month in months:
    data = []
    layout = dict(title=f"Temperature for month {get_month_name(month)}")
    std_dev_arr = []
    final_std_dev = []
    
    for year in years:
        # filter data for the given year and month
        filtered = combinedDF[(combinedDF['vikramSamvat'] == year) & (combinedDF['hinduMonthName'] == get_month_name(month))]
        filtered_grouped = filtered.groupby('paksha-tithi', sort=False).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

        filtered_grouped = filtered_grouped.reset_index()
        # if(month == 5) :
        #     print(year, get_month_name(month))
        #     print(filtered_grouped)

        std_dev = np.std(filtered_grouped['temperature'])
        # print(f"standard deviation for {month} {year} is {std_dev}")
        std_dev_arr.append(std_dev)

        # create a trace for the year
        trace = go.Scatter(
            x=filtered_grouped['paksha-tithi'],
            y=filtered_grouped['temperature'],
            name=str(year)
        )
        
        data.append(trace)
    
    final_std_dev.append(np.average(std_dev_arr))
    fig = go.Figure(data=data, layout=layout)
    fig.show()

print(f'Average Monthly Standard Deviation in English Calendar is {np.average(final_std_dev)}')


Average Monthly Standard Deviation in English Calendar is 1.9885051510620162
