In [1]:
import pandas as pd
import plotly.graph_objs as go
import numpy as np

In [2]:
df = pd.read_excel('../dataset.xlsx')
df['time'] = pd.to_datetime(df['time'])
df

Unnamed: 0,time,paksha,tithi,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
0,2018-01-01 00:00:00,Shukla Paksha,Chaturdashi,7.3,90,5.8,11.2,0.214,Pausha,2074
1,2018-01-01 01:00:00,Shukla Paksha,Chaturdashi,7.1,90,5.5,13.3,0.214,Pausha,2074
2,2018-01-01 02:00:00,Shukla Paksha,Chaturdashi,6.8,90,5.2,13.9,0.214,Pausha,2074
3,2018-01-01 03:00:00,Shukla Paksha,Chaturdashi,8.9,83,6.3,12.5,0.214,Pausha,2074
4,2018-01-01 04:00:00,Shukla Paksha,Chaturdashi,13.8,62,6.7,7.7,0.214,Pausha,2074
...,...,...,...,...,...,...,...,...,...,...
32923,2021-10-03 20:00:00,Krishna Paksha,Dwadashi,26.6,78,22.5,19.5,0.349,Ashwina,2078
32924,2021-10-03 21:00:00,Krishna Paksha,Dwadashi,26.4,77,22.1,20.2,0.349,Ashwina,2078
32925,2021-10-03 22:00:00,Krishna Paksha,Dwadashi,26.2,77,21.8,19.1,0.349,Ashwina,2078
32926,2021-10-03 23:00:00,Krishna Paksha,Trayodashi,25.9,77,21.6,17.6,0.349,Ashwina,2078


In [3]:
# Resampling data from hourly to daily
# set 'time' column as the index
df = df.set_index('time')

# resample hourly data to daily
df_daily = df.resample('D').agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'tithi': 'first', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

# reset the index to get 'time' as a column again
df_daily = df_daily.reset_index()

df_daily
df.reset_index()


Unnamed: 0,time,paksha,tithi,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
0,2018-01-01 00:00:00,Shukla Paksha,Chaturdashi,7.3,90,5.8,11.2,0.214,Pausha,2074
1,2018-01-01 01:00:00,Shukla Paksha,Chaturdashi,7.1,90,5.5,13.3,0.214,Pausha,2074
2,2018-01-01 02:00:00,Shukla Paksha,Chaturdashi,6.8,90,5.2,13.9,0.214,Pausha,2074
3,2018-01-01 03:00:00,Shukla Paksha,Chaturdashi,8.9,83,6.3,12.5,0.214,Pausha,2074
4,2018-01-01 04:00:00,Shukla Paksha,Chaturdashi,13.8,62,6.7,7.7,0.214,Pausha,2074
...,...,...,...,...,...,...,...,...,...,...
32923,2021-10-03 20:00:00,Krishna Paksha,Dwadashi,26.6,78,22.5,19.5,0.349,Ashwina,2078
32924,2021-10-03 21:00:00,Krishna Paksha,Dwadashi,26.4,77,22.1,20.2,0.349,Ashwina,2078
32925,2021-10-03 22:00:00,Krishna Paksha,Dwadashi,26.2,77,21.8,19.1,0.349,Ashwina,2078
32926,2021-10-03 23:00:00,Krishna Paksha,Trayodashi,25.9,77,21.6,17.6,0.349,Ashwina,2078


In [4]:
# Yearly Temperature comparison for each month according to english calendar
def get_month_name(month_num):
    month_names = ['', 'January', 'February', 'March', 'April', 'May', 'June', 
                   'July', 'August', 'September', 'October', 'November', 'December']
    return month_names[month_num]

# years array
years = [2018, 2019, 2020]

# months array
months = range(1, 13)

# loop through each month and create a separate figure
for month in months:
    data = []
    layout = dict(title=f"Dewpoint for month {get_month_name(month)}")
    std_dev_arr_english = []
    final_std_dev_english = []

    for year in years:
        # filter data for the given year and month
        df_filtered = df_daily[(df_daily['time'].dt.year == year) & (df_daily['time'].dt.month == month)]
        
        std_dev = np.std(df_filtered['dewpoint'])
        # print(f"standard deviation for {month} {year} is {std_dev}")
        std_dev_arr_english.append(std_dev)

        # create a trace for the year
        trace = go.Scatter(
            x=df_filtered['time'].dt.day,
            y=df_filtered['dewpoint'],
            name=str(year)
        )
        
        data.append(trace)
    
    final_std_dev_english.append(np.average(std_dev_arr_english))
    fig = go.Figure(data=data, layout=layout)
    fig.show()

print(f'Average Monthly Standard Deviation in English Calendar is {np.average(final_std_dev_english)}')


Average Monthly Standard Deviation in English Calendar is 2.6970079739711803


In [5]:
# Grouping hindu months together
hindu_month_grouped = df_daily.groupby(['vikramSamvat', 'hinduMonthName']).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'tithi': 'first', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})
hindu_month_grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,tithi,hinduMonthName,vikramSamvat
vikramSamvat,hinduMonthName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2074,Magha,14.201437,68.306034,7.773851,17.169109,0.207441,Pratipada,Magha,2074
2074,Pausha,13.147917,72.3125,7.564583,8.94375,0.213646,Chaturdashi,Pausha,2074
2074,Phalguna,18.242222,61.7,10.035694,16.712083,0.195665,Pratipada,Phalguna,2074
2075,Adhik Jyeshtha,33.814028,38.956944,16.010972,17.7775,0.166796,Pratipada,Adhik Jyeshtha,2075
2075,Agrahayana,16.33319,66.423851,9.260632,12.248851,0.236864,Pratipada,Agrahayana,2075
2075,Ashadha,29.882083,76.954167,25.067361,13.050972,0.178196,Pratipada,Ashadha,2075
2075,Ashwina,25.335489,66.678161,17.913793,11.356753,0.323986,Pratipada,Ashwina,2075
2075,Bhadra,27.327361,81.336111,23.623472,14.20625,0.333201,Pratipada,Bhadra,2075
2075,Chaitra,24.499337,46.208333,10.854356,17.020265,0.187045,Pratipada,Chaitra,2075
2075,Jyeshtha,32.148276,47.262931,18.360345,14.835057,0.16745,Pratipada,Jyeshtha,2075


In [6]:
# Making a df containing paksha and tithi combined
combinedDF = df
combinedDF['paksha-tithi'] = combinedDF.apply(lambda row: row['paksha'] + '-' + row['tithi'], axis=1)
# combinedDF.drop(['paksha', 'tithi'], axis=1, inplace=True)

filtered = combinedDF[(combinedDF['vikramSamvat'] == 2074) & (combinedDF['hinduMonthName'] == 'Phalguna')]
filtered_grouped = filtered.groupby('paksha-tithi', sort=False).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

filtered_grouped


Unnamed: 0_level_0,temperature,relativeHumidityPercentage,dewpoint,windspeed,soil_moisture,hinduMonthName,vikramSamvat
paksha-tithi,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Krishna Paksha-Pratipada,18.442857,57.333333,9.37619,20.880952,0.202,Phalguna,2074
Krishna Paksha-Dwitiya,17.509524,68.952381,11.138095,16.290476,0.20181,Phalguna,2074
Krishna Paksha-Tritiya,16.745455,67.590909,9.963636,15.813636,0.201,Phalguna,2074
Krishna Paksha-Chaturthi,14.440909,58.409091,5.954545,20.709091,0.201,Phalguna,2074
Krishna Paksha-Panchami,14.825,55.083333,5.129167,16.316667,0.200042,Phalguna,2074
Krishna Paksha-Shashthi,16.508333,47.791667,5.0,16.379167,0.2,Phalguna,2074
Krishna Paksha-Saptami,14.958333,46.958333,3.3875,22.845833,0.199875,Phalguna,2074
Krishna Paksha-Ashtami,14.823077,56.0,4.873077,14.988462,0.199,Phalguna,2074
Krishna Paksha-Navami,15.257692,54.346154,5.234615,16.803846,0.198808,Phalguna,2074
Krishna Paksha-Dashami,15.984615,55.769231,6.023077,8.853846,0.198,Phalguna,2074


In [7]:
# Yearly Temperature comparison for each month according to Hindu calendar
def get_month_name(month_num):
    hindu_month_names = [
    '', 'Chaitra', 'Vaishakha', 'Jyeshtha', 'Ashadha', 'Shravana', 'Bhadra', 
    'Ashwina', 'Kartika', 'Agrahayana', 'Pausa', 'Magha', 'Phalguna'
    ]   
    return hindu_month_names[month_num]

# years array
years = [2075, 2076, 2077]

# months array
months = range(1, 13)

# loop through each month and create a separate figure
for month in months:
    data = []
    layout = dict(title=f"Dewpoint for month {get_month_name(month)}")
    std_dev_arr_hindu = []
    final_std_dev_hindu = []
    
    for year in years:
        # filter data for the given year and month
        filtered = combinedDF[(combinedDF['vikramSamvat'] == year) & (combinedDF['hinduMonthName'] == get_month_name(month))]
        filtered_grouped = filtered.groupby('paksha-tithi', sort=False).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

        filtered_grouped = filtered_grouped.reset_index()
        # if(month == 5) :
        #     print(year, get_month_name(month))
        #     print(filtered_grouped)

        std_dev = np.std(filtered_grouped['dewpoint'])
        # print(f"standard deviation for {month} {year} is {std_dev}")
        std_dev_arr_hindu.append(std_dev)

        # create a trace for the year
        trace = go.Scatter(
            x=filtered_grouped['paksha-tithi'],
            y=filtered_grouped['dewpoint'],
            name=str(year)
        )
        
        data.append(trace)
    
    final_std_dev_hindu.append(np.average(std_dev_arr_hindu))
    fig = go.Figure(data=data, layout=layout)
    fig.show()

print(f'Average Monthly Standard Deviation in Hindu Calendar is {np.average(final_std_dev_hindu)}')


Average Monthly Standard Deviation in Hindu Calendar is 2.690036589646855


In [8]:
data = [
    {'x': ['Hindu Calendar', 'English Calendar'], 'y': [final_std_dev_hindu[0], final_std_dev_english[0]], 'type': 'bar'}
]

layout = {
    'title': {'text': 'Standard Deviation Comparison For Dewpoint'},
    'xaxis': {'title': {'text': 'Calendar Type'}},
    'yaxis': {'title': {'text': 'Standard Deviation'}}
}

fig = go.Figure(data=data, layout=layout)
fig.show()

In [9]:
# Define the x-axis categories
years = ["2018", "2019", "2020"]

# Create the figure object
fig = go.Figure()

# Add the data for the Hindu calendar
fig.add_trace(go.Bar(x=years, y=std_dev_arr_hindu, name='Hindu Calendar'))

# Add the data for the English calendar
fig.add_trace(go.Bar(x=years, y=std_dev_arr_english, name='English Calendar'))

# Set the layout of the chart
fig.update_layout(title='Yearly Standard Deviation Comparison of Dewpoint in Hindu and English Calendar',
                  xaxis_title='Month',
                  yaxis_title='Standard Deviation')

# Show the chart
fig.show()


In [10]:
# Monthly Std Dev for each month of every english year

def get_month_name(month_num):
    month_names = ['', 'January', 'February', 'March', 'April', 'May', 'June', 
                   'July', 'August', 'September', 'October', 'November', 'December']
    return month_names[month_num]

# years array
years = [2018, 2019, 2020]

# months array
months = range(1, 13)

months_deviation_english = []
for year in years:
    arr = []
    for month in months:
        df_filtered = df_daily[(df_daily['time'].dt.year == year) & (df_daily['time'].dt.month == month)]
        arr.append(np.std(df_filtered['dewpoint']))
    months_deviation_english.append({year: arr})

months_deviation_english

[{2018: [1.716051888745006,
   3.329261636811312,
   2.0128819551019785,
   4.610535154792798,
   4.38050316639632,
   2.2061993707687084,
   0.8820896486328517,
   1.048644733300742,
   1.233887989956649,
   3.257688588402871,
   2.6703714174145228,
   2.1154488294107883]},
 {2019: [2.7891462185607923,
   2.4959526611324643,
   2.235242290938998,
   3.536453922568569,
   4.434973724328462,
   3.153065298318917,
   1.5044836542878064,
   0.6228033429029508,
   1.3690041550496512,
   2.541241534587792,
   1.9382233267484557,
   2.080761819129652]},
 {2020: [1.9463813493192141,
   3.2393150181116397,
   1.6578968782492305,
   2.639473213815707,
   4.603479974076055,
   1.463646564383663,
   0.683126242910514,
   0.8503349073043779,
   2.532761284858928,
   3.1093772337878076,
   2.749381345254929,
   3.8948132733731007]}]

In [11]:
# Monthly Std Dev for each month of every hindu year

def get_month_name(month_num):
    hindu_month_names = [
    '', 'Chaitra', 'Vaishakha', 'Jyeshtha', 'Ashadha', 'Shravana', 'Bhadra', 
    'Ashwina', 'Kartika', 'Agrahayana', 'Pausa', 'Magha', 'Phalguna'
    ]   
    return hindu_month_names[month_num]

# years array
years = [2075, 2076, 2077]

# months array
months = range(1, 13)

months_deviation_hindu = []
# loop through each month and create a separate figure
for year in years:
    arr = []
    for month in months:
        # filter data for the given year and month
        filtered = combinedDF[(combinedDF['vikramSamvat'] == year) & (combinedDF['hinduMonthName'] == get_month_name(month))]
        filtered_grouped = filtered.groupby('paksha-tithi', sort=False).agg({'temperature': 'mean','relativeHumidityPercentage': 'mean', 'dewpoint': 'mean','windspeed': 'mean', 'soil_moisture': 'mean', 'hinduMonthName': 'first', 'vikramSamvat': 'first'})

        filtered_grouped = filtered_grouped.reset_index()
        arr.append(np.std(filtered_grouped['dewpoint']))
    months_deviation_hindu.append({year: arr})

months_deviation_hindu
        


[{2075: [1.7057483224271341,
   4.5673543051798955,
   3.125132288501038,
   0.8856824895177716,
   1.0422150097254457,
   1.3770590384157477,
   3.7750251453114907,
   2.6250418331292575,
   2.271789267437227,
   2.2369447213079567,
   3.381196664887069,
   2.0401092205782962]},
 {2076: [1.982291116193864,
   4.564960167827806,
   4.5471977019195,
   2.4360687083921353,
   0.6753726505306505,
   0.577629785135764,
   2.1773845303680615,
   2.60935085121641,
   2.115350044571914,
   2.071798558278291,
   1.7589843612440552,
   3.193972769838442]},
 {2077: [5.809824820898875,
   2.943293029796727,
   4.899898198870865,
   1.3939827458444676,
   0.6350625010731338,
   0.9377282564277415,
   6.961834508381825,
   2.858333458865134,
   3.9457585391130072,
   2.9368307723549414,
   2.0013678540582562,
   2.8360277785238255]}]

In [12]:
import pandas as pd
import plotly.express as px

# Concatenate the mean deviation arrays for each month of both calendars
months_deviation = []
for i in range(12):
    hindu_month_deviation = []
    english_month_deviation = []
    for year_dict in months_deviation_hindu:
        hindu_month_deviation.append(year_dict[list(year_dict.keys())[0]][i])
    for year_dict in months_deviation_english:
        english_month_deviation.append(year_dict[list(year_dict.keys())[0]][i])
    months_deviation.append(hindu_month_deviation + english_month_deviation)

# Create a dataframe with columns for the month, year, and mean deviation
df = pd.DataFrame({'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']*6,
                   'Year': [2018]*12 + [2019]*12 + [2020]*12 + [2075]*12 + [2076]*12 + [2077]*12,
                   'Mean Deviation': months_deviation[0] + months_deviation[1] + months_deviation[2] +
                                     months_deviation[3] + months_deviation[4] + months_deviation[5] +
                                     months_deviation[6] + months_deviation[7] + months_deviation[8] +
                                     months_deviation[9] + months_deviation[10] + months_deviation[11],
                   'Calendar': ['Hindu']*36 + ['English']*36})

# Use Plotly's px.bar() function to create a side-by-side bar chart
fig = px.bar(df, x='Month', y='Mean Deviation', color='Calendar', barmode='group', facet_col='Year',
             labels={'Month': 'Month', 'Mean Deviation': 'Mean Deviation of Dewpoint'})
fig.update_layout(title='Mean Deviation of Dewpoint for Each Hindu and English Month', yaxis_title='Mean Deviation')
fig.show()


In [13]:
# Extract data for Hindi and English months
hindi_months = ['', 'Chaitra', 'Vaishakha', 'Jyeshtha', 'Ashadha', 'Shravana', 'Bhadra', 'Ashwina', 'Kartika', 'Agrahayana', 'Pausa', 'Magha', 'Phalguna']
english_months = [" ", "April", "May", "June", "July", "August", "September",    "October", "November", "December", "January", "February", "March"]

data = []
for year_data_hindu, year_data_english in zip(months_deviation_hindu, months_deviation_english):
    year_hindu = list(year_data_hindu.keys())[0]
    year_english = list(year_data_english.keys())[0]
    deviation_hindu = year_data_hindu[year_hindu]
    deviation_english = year_data_english[year_english]
    for i, (dev_hindu, dev_english) in enumerate(zip(deviation_hindu, deviation_english)):
        month_hindu = hindi_months[i+1]
        month_english = english_months[i+1]
        data.append(go.Bar(name=f'{month_hindu} {year_hindu}', x=[f'{month_hindu} {year_hindu}', f'{month_english} {year_english}'], y=[dev_hindu, dev_english]))

layout = go.Layout(barmode='group', title='Dewpoint Deviation')
fig = go.Figure(data=data, layout=layout)
fig.show()


In [14]:
# Compute average deviation for each month across all years in months_deviation_hindu

hindu_months = ['Chaitra', 'Vaishakha', 'Jyeshtha', 'Ashadha', 'Shravana', 'Bhadra', 'Ashwina', 'Kartika', 'Agrahayana', 'Pausa', 'Magha', 'Phalguna']
english_months = ["April", "May", "June", "July", "August", "September",    "October", "November", "December", "January", "February", "March"]

average_months_deviation_hindu = {}
for year_data in months_deviation_hindu:
    year = list(year_data.keys())[0]
    for month_num, deviation in enumerate(year_data[year]):
        month_name = hindu_months[month_num]
        if month_name in average_months_deviation_hindu:
            average_months_deviation_hindu[month_name] += deviation
        else:
            average_months_deviation_hindu[month_name] = deviation

for month_name in average_months_deviation_hindu:
    average_months_deviation_hindu[month_name] /= len(months_deviation_hindu)

# Compute average deviation for each month across all years in months_deviation_english
average_months_deviation_english = {}
for year_data in months_deviation_english:
    year = list(year_data.keys())[0]
    for month_num, deviation in enumerate(year_data[year]):
        month_name = english_months[month_num]
        if month_name in average_months_deviation_english:
            average_months_deviation_english[month_name] += deviation
        else:
            average_months_deviation_english[month_name] = deviation

for month_name in average_months_deviation_english:
    average_months_deviation_english[month_name] /= len(months_deviation_english)

average_months_deviation_english


{'April': 2.1505264855416706,
 'May': 3.0215097720184723,
 'June': 1.9686737080967358,
 'July': 3.595487430392358,
 'August': 4.472985621600279,
 'September': 2.2743037444904295,
 'October': 1.0232331819437241,
 'November': 0.8405943278360236,
 'December': 1.7118844766217425,
 'January': 2.9694357855928235,
 'February': 2.452658696472636,
 'March': 2.6970079739711803}

In [15]:
import plotly.graph_objs as go

hindu_months = ["Chaitra", "Vaishakha", "Jyeshtha", "Ashadha", "Shravana", "Bhadra", "Ashwina", "Kartika", "Agrahayana", "Pausa", "Magha", "Phalguna"]
english_months = ["April", "May", "June", "July", "August", "September", "October", "November", "December", "January", "February", "March"]

data = []
for hindu_month, english_month in zip(hindu_months, english_months):
    data.append(go.Bar(name=hindu_month, x=[english_month], y=[average_months_deviation_hindu[hindu_month]]))
    data.append(go.Bar(name=english_month, x=[english_month], y=[average_months_deviation_english[english_month]]))

layout = go.Layout(barmode='group')
fig = go.Figure(data=data, layout=layout)
fig.show()

