In [1]:
import pandas as pd
import os
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 60)
pd.__version__
import re

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def read_csv(filename):
    try:
        df = pd.read_csv(filename, header=1)
    except Exception as e:
        df='chrysoula'
        print(f"cannot read {filename}. Error: {e}")
    return df

In [3]:
def read_all_csv(path):
    ''' This function:
     - reads all the data of droughts
     - concatenates them
     - removes unwanted columns
     - turns strings into numbers
     '''
    # this is where we'll save all the dataframes when we read them
    list_of_dfs = []
       
    # lists the files of the path
    files = os.listdir(path)
    
    # filters the files we want to read
    csv_files = [f for f in files if f.endswith('.csv')]
    
    # loop through the files
    for file in csv_files:
        
        list_of_name = file
#         prints message for us to see the progress
        df = pd.read_csv(f"{path}/{file}") 
#         creates column with the name of the file
        df['file_name']= list_of_name
#         puts it in the list
        list_of_dfs.append(df)
  
    # concatenate all dataframes
    greece = pd.concat(list_of_dfs).reset_index(drop=True)
    
   
    return greece

In [4]:
def date_to_year(df):
    '''
    turns the column of dates into years,
    keeps only October,
    and returns the clean df
    '''
    
# making a new column with datetime data and keeping only October
    df = df[pd.to_datetime(df['DATA'], format='%b%Y').dt.month == 10].copy()
    
# keeping only the year   
    df['year'] = pd.to_datetime(df['DATA'], format='%b%Y').dt.year
       
# cleaning the dataset from the unwanted column
    df = df[['year','SPEI_12','file_name']].reset_index(drop=True).copy() 
    
    return df

In [5]:
def extract_coordinates(df):
    '''
    extracts the coordinates in the filename,
    returns two columns with latitude and longtitude
    '''
    df['lat'] = df.file_name.str.extract(r'(\d+[.]\d+)_\d+[.]').astype(float)

    df['long'] = df.file_name.str.extract(r'\d+[.]\d+_(\d+[.]\d+)').astype(float) 

    new_df = df[['year','SPEI_12','lat','long']].copy()

    return new_df

In [6]:
def find_stats(df):
    '''
    finds the median, mean and makes a new dataframe
    '''
    
    df_stat = df.groupby('year').agg({'SPEI_12': ['median', 'mean']}).reset_index().copy()
    
    df_stat.columns = list(map(''.join, df_stat.columns.values))
    
    df_stat = df_stat.reset_index().drop(['index'], axis=1)
    
    return df_stat

In [7]:
# Uploading the df for Greece
gr_df = read_all_csv("C:\\Users\\c.marinou\\Downloads\\Droughts2024\\Greece_2024")
gr_df = date_to_year(gr_df)
gr_df = extract_coordinates(gr_df) 
gr_df.tail()

In [11]:
gr_stats = find_stats(gr_df)

In [13]:
# Μειώθηκαν ή αυξήθηκαν τα ποσοστα ξηρασίας τα τελευταία χρόνια; Κατα πόσο;

# finds the difference between the median values for each year
gr_stats['value_dif'] = gr_stats['SPEI_12median'].diff()
gr_stats

Unnamed: 0,year,SPEI_12median,SPEI_12mean,value_dif
0,1950,,,
1,1951,0.886975,0.729504,
2,1952,-0.707055,-0.669552,-1.59403
3,1953,1.03711,1.040955,1.744165
4,1954,0.455455,0.504595,-0.581655
5,1955,1.23527,1.013783,0.779815
6,1956,0.27958,0.292067,-0.95569
7,1957,0.32035,0.298967,0.04077
8,1958,-0.55895,-0.567785,-0.8793
9,1959,-0.01251,0.225995,0.54644


In [21]:
gr_stats.to_csv("greece_drought.csv")

In [17]:
# Same for Eastern Peloponnisos 
pelop_df = read_all_csv("C:\\Users\\c.marinou\\Downloads\\Droughts2024\\EastPelop_2024")

In [18]:
pelop_df = date_to_year(pelop_df)

In [17]:
# Uploading the df for Evoia and Attiki 
# the areas are too small to consider as different 
evoia_attiki_df = read_all_csv("C:\\Users\\c.marinou\\Downloads\\Droughts2024\\Attiki_Evoia")
evoia_attiki_df = date_to_year(evoia_attiki_df)
evoia_attiki_df = extract_coordinates(evoia_attiki_df) 
evoia_attiki_df.tail()

Unnamed: 0,year,SPEI_12,lat,long
1105,2019,0.85162,38.75,24.75
1106,2020,0.66497,38.75,24.75
1107,2021,-0.77869,38.75,24.75
1108,2022,-0.29722,38.75,24.75
1109,2023,-0.93529,38.75,24.75


In [18]:
evoia_attiki_stats = find_stats(evoia_attiki_df)
evoia_attiki_stats['value_dif'] = evoia_attiki_stats['SPEI_12median'].diff()
evoia_attiki_stats

Unnamed: 0,year,SPEI_12median,SPEI_12mean,value_dif
0,1950,,,
1,1951,0.25173,0.437359,
2,1952,-0.88382,-0.822791,-1.13555
3,1953,1.72418,1.757863,2.608
4,1954,0.46672,0.244596,-1.25746
5,1955,1.77302,1.528696,1.3063
6,1956,-0.15,-0.104391,-1.92302
7,1957,-0.26011,-0.317183,-0.11011
8,1958,-0.65652,-0.622297,-0.39641
9,1959,-0.1755,-0.166259,0.48102


In [19]:
# Uploading the df for Evros
evros_df = read_all_csv("C:\\Users\\c.marinou\\Downloads\\Droughts2024\\Evros")
evros_df = date_to_year(evros_df)
evros_df = extract_coordinates(evros_df) 
evros_df.tail()

Unnamed: 0,year,SPEI_12,lat,long
1327,2019,-0.25396,41.75,26.75
1328,2020,-1.41438,41.75,26.75
1329,2021,1.40266,41.75,26.75
1330,2022,-0.62418,41.75,26.75
1331,2023,-1.93421,41.75,26.75


In [20]:
evros_stats = find_stats(evros_df)
evros_stats['value_dif'] = evros_stats['SPEI_12median'].diff()
evros_stats

Unnamed: 0,year,SPEI_12median,SPEI_12mean,value_dif
0,1950,,,
1,1951,0.25173,0.437359,
2,1952,-0.88382,-0.822791,-1.13555
3,1953,1.72418,1.757863,2.608
4,1954,0.46672,0.244596,-1.25746
5,1955,1.77302,1.528696,1.3063
6,1956,-0.15,-0.104391,-1.92302
7,1957,-0.26011,-0.317183,-0.11011
8,1958,-0.65652,-0.622297,-0.39641
9,1959,-0.1755,-0.166259,0.48102


In [21]:
# Uploading the df for Crete
crete_df = read_all_csv("C:\\Users\\c.marinou\\Downloads\\Droughts2024\\Crete")
crete_df = date_to_year(crete_df)
crete_df = extract_coordinates(crete_df) 
crete_df.tail()

Unnamed: 0,year,SPEI_12,lat,long
1327,2019,1.6061,35.75,26.25
1328,2020,0.32168,35.75,26.25
1329,2021,-2.00548,35.75,26.25
1330,2022,0.46862,35.75,26.25
1331,2023,-2.90311,35.75,26.25


In [22]:
crete_stats = find_stats(crete_df)
crete_stats['value_dif'] = crete_stats['SPEI_12median'].diff()
crete_stats

Unnamed: 0,year,SPEI_12median,SPEI_12mean,value_dif
0,1950,,,
1,1951,0.25173,0.437359,
2,1952,-0.88382,-0.822791,-1.13555
3,1953,1.72418,1.757863,2.608
4,1954,0.46672,0.244596,-1.25746
5,1955,1.77302,1.528696,1.3063
6,1956,-0.15,-0.104391,-1.92302
7,1957,-0.26011,-0.317183,-0.11011
8,1958,-0.65652,-0.622297,-0.39641
9,1959,-0.1755,-0.166259,0.48102
