In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [9]:
# Loading dataset
df = pd.read_csv("Argentina_weather_dataset.csv")

In [15]:
df.head(5)

Unnamed: 0,Country,Date,Temp_Max,Temp_Min,Temp_Mean,Precipitation_Sum,Windspeed_Max,Windgusts_Max,Sunshine_Duration
0,Argentina,01-01-2000,33.9,20.7,27.5,0.0,17.9,32.8,49047.75
1,Argentina,02-01-2000,32.9,22.2,27.3,0.6,23.1,43.6,48969.22
2,Argentina,03-01-2000,32.1,21.0,26.9,0.0,20.6,40.7,48921.57
3,Argentina,04-01-2000,32.2,20.9,26.7,0.0,25.1,47.2,48091.2
4,Argentina,05-01-2000,30.5,21.0,25.7,0.0,23.7,45.4,48922.09


In [17]:
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8766 entries, 0 to 8765
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Country            8766 non-null   object 
 1   Date               8766 non-null   object 
 2   Temp_Max           8766 non-null   float64
 3   Temp_Min           8766 non-null   float64
 4   Temp_Mean          8766 non-null   float64
 5   Precipitation_Sum  8766 non-null   float64
 6   Windspeed_Max      8766 non-null   float64
 7   Windgusts_Max      8766 non-null   float64
 8   Sunshine_Duration  8766 non-null   float64
dtypes: float64(7), object(2)
memory usage: 616.5+ KB
None
          Temp_Max     Temp_Min    Temp_Mean  Precipitation_Sum  \
count  8766.000000  8766.000000  8766.000000        8766.000000   
mean     21.448517    10.953091    15.938182           1.491901   
std       7.252082     6.082416     6.583756           5.078362   
min       4.000000    -5.900000    -1.4

In [19]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

In [21]:
# Check missing values
print(df.isnull().sum())

Country                 0
Date                 5310
Temp_Max                0
Temp_Min                0
Temp_Mean               0
Precipitation_Sum       0
Windspeed_Max           0
Windgusts_Max           0
Sunshine_Duration       0
dtype: int64


In [25]:
# Fill or drop missing values
df = df.dropna() 

In [31]:
if 'Date' in df.columns and 'Temperature' in df.columns:
    plt.figure(figsize=(12,6))
    plt.plot(df['Date'], df['Temperature'], color='red')
    plt.title("Temperature Trend Over Time")
    plt.xlabel("Date")
    plt.ylabel("Temperature (°C)")
    plt.show()

In [33]:
if 'Date' in df.columns and 'Rainfall' in df.columns:
    df['Month'] = df['Date'].dt.month
    monthly_rainfall = df.groupby('Month')['Rainfall'].mean()

    plt.figure(figsize=(10,5))
    monthly_rainfall.plot(kind='bar', color='blue')
    plt.title("Average Monthly Rainfall")
    plt.xlabel("Month")
    plt.ylabel("Rainfall (mm)")
    plt.show()


In [35]:
if 'WindSpeed' in df.columns and 'Rainfall' in df.columns:
    plt.figure(figsize=(8,6))
    sns.scatterplot(x='WindSpeed', y='Rainfall', data=df, hue='Temperature', palette="coolwarm")
    plt.title("Wind Speed vs Rainfall (colored by Temperature)")
    plt.show()


In [37]:
if 'Temperature' in df.columns:
    print("\nAverage Temperature:", df['Temperature'].mean())
    print("Max Temperature:", df['Temperature'].max())
    print("Min Temperature:", df['Temperature'].min())

if 'Rainfall' in df.columns:
    print("\nAverage Rainfall:", df['Rainfall'].mean())
    print("Max Rainfall:", df['Rainfall'].max())
    print("Min Rainfall:", df['Rainfall'].min())

if 'Humidity' in df.columns:
    print("\nAverage Humidity:", df['Humidity'].mean())

# Step 7: Identify Extremes
if 'Temperature' in df.columns:
    print("\nHottest Day Record:")
    print(df.loc[df['Temperature'].idxmax()])

if 'Rainfall' in df.columns:
    print("\nRainiest Day Record:")
    print(df.loc[df['Rainfall'].idxmax()])