## Merging dataframes

In [8]:
# Load the cleaned news data and the weather data
news_df = pd.read_csv('News_data.csv')
weather_df = pd.read_csv('Weather_data.csv')

In [9]:
news_df

Unnamed: 0,month,article count
0,2011-01,49
1,2011-02,53
2,2011-03,37
3,2011-04,44
4,2011-05,52
...,...,...
147,2023-04,68
148,2023-05,82
149,2023-06,71
150,2023-07,66


In [10]:
weather_df

Unnamed: 0,month,mean temp,mean daily max temp,mean daily min temp,mean wind speed,accumulated precipitation,mean temp anomaly,mean daily max temp anomaly,mean daily min temp anomaly,mean wind speed anomaly,accumulated precipitation anomaly
0,2011-01,0.3,2.6,,4.2,47.8,-1.8,-2.0,,-1.1,-21.5
1,2011-02,-0.1,1.6,,6.5,39.9,-2.1,-3.1,,1.1,-12.7
2,2011-03,3.0,,,5.1,29.9,-0.7,,,0.2,-13.5
3,2011-04,9.9,,,4.7,17.4,2.7,,,-0.0,-19.5
4,2011-05,11.3,,,4.9,54.4,-0.2,,,0.5,5.2
...,...,...,...,...,...,...,...,...,...,...,...
146,2023-03,3.5,6.8,0.2,4.7,78.9,-0.2,-0.1,0.0,-0.2,35.5
147,2023-04,7.0,11.2,2.9,4.5,43.8,-0.2,-0.5,0.1,-0.2,6.9
148,2023-05,11.2,16.0,6.4,4.6,14.1,-0.3,0.3,-0.4,0.2,-35.1
149,2023-06,16.4,21.9,10.7,3.6,27.0,1.3,1.1,-0.5,-0.6,-33.4


In [15]:
# Merge the news data (article_df) with the weather data (weather_df) on the 'month' column
merged_df = weather_df.merge(news_df, on='month', how='left')

# Fill any NaN values in 'article count' column with 0 and convert to integer
merged_df['article count'] = merged_df['article count'].fillna(0).astype(int)

# Display the merged data or save it to a CSV
# merged_df.to_csv('Final_data.csv', index=False)
merged_df


Unnamed: 0,month,mean temp,mean daily max temp,mean daily min temp,mean wind speed,accumulated precipitation,mean temp anomaly,mean daily max temp anomaly,mean daily min temp anomaly,mean wind speed anomaly,accumulated precipitation anomaly,article count
0,2011-01,0.3,2.6,,4.2,47.8,-1.8,-2.0,,-1.1,-21.5,49
1,2011-02,-0.1,1.6,,6.5,39.9,-2.1,-3.1,,1.1,-12.7,53
2,2011-03,3.0,,,5.1,29.9,-0.7,,,0.2,-13.5,37
3,2011-04,9.9,,,4.7,17.4,2.7,,,-0.0,-19.5,44
4,2011-05,11.3,,,4.9,54.4,-0.2,,,0.5,5.2,52
...,...,...,...,...,...,...,...,...,...,...,...,...
146,2023-03,3.5,6.8,0.2,4.7,78.9,-0.2,-0.1,0.0,-0.2,35.5,59
147,2023-04,7.0,11.2,2.9,4.5,43.8,-0.2,-0.5,0.1,-0.2,6.9,68
148,2023-05,11.2,16.0,6.4,4.6,14.1,-0.3,0.3,-0.4,0.2,-35.1,82
149,2023-06,16.4,21.9,10.7,3.6,27.0,1.3,1.1,-0.5,-0.6,-33.4,71


## Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
# Define colors for each month:
months_colors = {
    "01": (0, 0, 0.5, 1),     # Dark blue
    "02": (0, 0, 1, 1),   # Medium blue
    "03": (0, 0.6, 0.75, 1), # Light blue
    "04": (0, 0.8, 0, 1),    # Light green
    "05": (0, 0.6, 0, 1),    # Medium green
    "06": (0.9, 0, 0, 0),  # Transparent red
    "07": (0.9, 0, 0, 0),  # Transparent red
    "08": (0.8, 0, 0, 1),    # Medium red
    "09": (0.9, 0.5, 0, 1),  # Light orange
    "10": (0.9, 0.4, 0, 1),  # Medium orange
    "11": (0.9, 0.3, 0, 1),  # Dark orange
    "12": (0, 0, 1, 1)    # Darkest blue
}

for param in anomaly_parameters:
    plt.figure(figsize=(15, 6))
    
    # Boxplot for each anomaly parameter.
    sns.boxplot(data=df_combined_all_parameters, 
                x=df_combined_all_parameters['month'].str[-2:], 
                y=param, 
                palette=months_colors,
                width=0.6, 
                boxprops=dict(alpha=.8),
                medianprops=dict(color='black', linewidth=2.5),
                whiskerprops=dict(color='gray', linewidth=1.2),
                capprops=dict(color='gray'))
    
    # Titles and labels.
    plt.title(f'{param.capitalize()} by month', fontsize=18, fontweight='bold', pad=20)
    plt.xlabel('Month', fontsize=14, labelpad=12)
    plt.ylabel(param.capitalize(), fontsize=14, labelpad=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    
    # Grid and borders.
    sns.despine(top=True, right=True, left=True, bottom=True)
    plt.grid(color='gray', linestyle='--', linewidth=0.5, axis='y', alpha=0.6)
    
    plt.tight_layout()
    plt.show()