# Wind Statistics

### Step 1: Import the necessary libraries

In [None]:
# Install necessary dependencies
# !pip install pandas numpy

import pandas as pd
import numpy as np

### Step 2: Import the dataset from the given address

In [None]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-wind-speed.csv"
data = pd.read_csv(url, delim_whitespace=True, na_values='NaN')

### Step 3: Assign it to a variable called `data` and replace the first 3 columns by a proper datetime index

In [None]:
data['date'] = pd.to_datetime(data[['Yr', 'Mo', 'Dy']].rename(columns={'Yr': 'year', 'Mo': 'month', 'Dy': 'day'}))
data.set_index('date', inplace=True)
data.drop(columns=['Yr', 'Mo', 'Dy'], inplace=True)

### Step 4: Fix Year 2061 by creating a function to correct it

In [None]:
def fix_years(year):
    return year - 100 if year > 2000 else year

data.index = data.index.map(lambda x: x.replace(year=fix_years(x.year)))

### Step 5: Ensure the index is of type `datetime64[ns]`

In [None]:
print(data.index.dtype)  # Verify the index type

### Step 6: Compute how many values are missing for each location over the entire record

In [None]:
missing_values = data.isna().sum()
print(missing_values)

### Step 7: Compute how many non-missing values there are in total

In [None]:
non_missing_values = data.notna().sum().sum()
print(non_missing_values)

### Step 8: Calculate the mean windspeeds over all locations and times (a single number)

In [None]:
overall_mean = data.mean().mean()
print(overall_mean)

### Step 9: Create a DataFrame called `loc_stats` with min, max, mean, and standard deviation for each location

In [None]:
loc_stats = data.describe().T[['min', 'max', 'mean', 'std']]
print(loc_stats)

### Step 10: Create a DataFrame called `day_stats` with min, max, mean, and standard deviation for each day

In [None]:
day_stats = pd.DataFrame({
    'min': data.min(axis=1),
    'max': data.max(axis=1),
    'mean': data.mean(axis=1),
    'std': data.std(axis=1)
})
print(day_stats)

### Step 11: Average windspeed in January for each location

In [None]:
january_avg = data[data.index.month == 1].mean()
print(january_avg)

### Step 12: Downsample the record to a yearly frequency

In [None]:
yearly_data = data.resample('Y').mean()
print(yearly_data)

### Step 13: Downsample the record to a monthly frequency

In [None]:
monthly_data = data.resample('M').mean()
print(monthly_data)

### Step 14: Downsample the record to a weekly frequency

In [None]:
weekly_data = data.resample('W').mean()
print(weekly_data)

### Step 15: Calculate min, max, mean, and standard deviation of the windspeeds across all locations for each week

In [None]:
weekly_stats = weekly_data.head(52).agg(['min', 'max', 'mean', 'std'], axis=1)
print(weekly_stats)