In [2]:
import pandas as pd
import numpy as np

In [3]:
precip = pd.read_csv("data/filtered/precipitation_filter.csv")
moisture = pd.read_csv("data/filtered/moisture_filter.csv")
pression = pd.read_csv("data/filtered/pression_filter.csv")
temperature = pd.read_csv("data/filtered/temperature_filter.csv")
wind = pd.read_csv("data/filtered/wind_vectors_filter.csv")

In [3]:
#Highest precipitation over 10 minutes
precip.sort_values('precipitation', ascending=False).head(10)

Unnamed: 0,station,time,precipitation
4102375,VSARO,202205200510,44.6
58746977,SOG,201907261440,37.3
4129790,VSARO,202211261420,36.4
4129835,VSARO,202211262150,36.4
20116705,EBK,202208261720,34.5
44836601,MSG,202205140200,32.9
67638646,WAG,202107122350,31.1
58058385,SIH,202106211720,30.5
58904677,SOG,202207251800,30.2
3796297,APP,202107241610,30.1


In [8]:
precip_copy = precip.copy()

precip_copy['dry'] = (precip_copy['precipitation']==0).astype(int)

precip_copy['dry_group'] = precip_copy.groupby('station')['dry'].diff().ne(0).cumsum()

dry_periods = precip_copy[precip_copy['dry'] == 1].groupby(['station', 'dry_group'])['time'].agg(['min', 'max', 'count']).reset_index()

output = dry_periods.sort_values('count', ascending=False)

print(output.head(5).to_latex(index=False, caption="Top 5 Longest Dry Periods", label="tab:dry_periods"))

\begin{table}
\caption{Top 5 Longest Dry Periods}
\label{tab:dry_periods}
\begin{tabular}{lrrrr}
\toprule
station & dry_group & min & max & count \\
\midrule
GVE & 1006049 & 202003121950 & 202004251920 & 6334 \\
COU & 690669 & 202003130220 & 202004251330 & 6260 \\
LUG & 1573187 & 202201050720 & 202202141740 & 5823 \\
COL & 653116 & 202201050600 & 202202141540 & 5819 \\
COL & 653140 & 202202182350 & 202203300530 & 5651 \\
\bottomrule
\end{tabular}
\end{table}



In [9]:
#highest amount of precipitation
precip_sum = precip[['station', 'precipitation']].groupby('station').sum()
print("Station with the highest total precipitation:")
print(precip_sum.sort_values('precipitation', ascending=False).head(1))

print("Station with the lowest total precipitation:")
print(precip_sum.sort_values('precipitation', ascending=False).tail(1))

#precipitation mean()
precip_sum = precip[['station', 'precipitation']].groupby('station').mean()
print("Station with the highest average precipitation:")
print(precip_sum.sort_values('precipitation', ascending=False).head(1))

print("Station with the lowest average precipitation:")
print(precip_sum.sort_values('precipitation', ascending=False).tail(1))

Station with the highest total precipitation:
         precipitation
station               
SAE            16475.4
Station with the lowest total precipitation:
         precipitation
station               
SNG             1299.6
Station with the highest average precipitation:
         precipitation
station               
SAE           0.062657
Station with the lowest average precipitation:
         precipitation
station               
VSMAT         0.010165


In [5]:
#Highest temperature recorded
temperature.sort_values('temperature', ascending=False).head(10)

Unnamed: 0,station,time,temperature
11550896,GVE,202308241320,39.3
11550901,GVE,202308241410,39.1
11550897,GVE,202308241330,38.7
11550902,GVE,202308241420,38.7
11550899,GVE,202308241350,38.7
11550900,GVE,202308241400,38.5
22594549,CGI,202308241410,38.5
11550898,GVE,202308241340,38.4
11550891,GVE,202308241230,38.4
22594548,CGI,202308241400,38.4


In [6]:
#Lowest temp
temperature.sort_values('temperature').head(10)

Unnamed: 0,station,time,temperature
26461989,SAM,202102140330,-30.5
4318737,BUF,202102140530,-29.7
26457094,SAM,202101110340,-29.6
26462010,SAM,202102140700,-29.5
4318745,BUF,202102140650,-29.5
4318744,BUF,202102140640,-29.5
4318717,BUF,202102140210,-29.4
26462000,SAM,202102140520,-29.4
4318734,BUF,202102140500,-29.3
26461998,SAM,202102140500,-29.2


In [None]:
temperature.index = pd.to_datetime(temperature['time'], format='%Y%m%d%H%M')
precip.index = pd.to_datetime(precip['time'], format='%Y%m%d%H%M')
wind.index = pd.to_datetime(wind['time'], format='%Y%m%d%H%M')
moisture.index = pd.to_datetime(moisture['time'], format='%Y%m%d%H%M')
pression.index = pd.to_datetime(pression['time'], format='%Y%m%d%H%M')

merged_data = precip[['precipitation']].merge(temperature[['temperature']], left_index=True, right_index=True, how='left')
merged_data = merged_data.merge(wind[['North', 'East']],  left_index=True, right_index=True, how='left')
merged_data = merged_data.merge(moisture[['moisture']],  left_index=True, right_index=True, how='left')
merged_data = merged_data.merge(pression[['pression']],  left_index=True, right_index=True, how='left')

: 