# Wind Statistics 
The data have been modified to contain some missing values, identified by NaN.
Using pandas should make this exercise easier, in particular for the bonus question.
You should be able to perform all of these operations without using a for loop or 
other looping construct.
The data in 'wind.data' has the following format:
Yr Mo Dy RPT VAL ROS KIL SHA BIR DUB CLA MUL CLO BEL 
MAL
61 1 1 15.04 14.96 13.17 9.29 NaN 9.87 13.67 10.25 10.83 12.58 18.50 15.04
61 1 2 14.71 NaN 10.83 6.50 12.62 7.67 11.50 10.04 9.79 9.67 17.54 13.83
61 1 3 18.50 16.88 12.33 10.13 11.17 6.17 11.25 NaN 8.50 7.67 12.75 12.71

The first three columns are year,month, and day. The remaining 12 columns are average windspeeds in knots at 12 locations in Ireland on that day

In [None]:
import pandas as pd

def fix_year(x):
    if x.year > 2000:
        year = x.year - 100
    else:
        year = x.year
    return pd.datetime(year, x.month, x.day)

# import the wind.txt file as a pandas DataFrame
data = pd.read_csv('wind.txt', delim_whitespace=True)

data["Yr"]="20"+data['Yr'].astype(str)
data.rename(columns = {'Yr':'year','Mo':'month','Dy':'day'}, inplace=True)
data.index=pd.to_datetime(data[['year', 'month', 'day']])
data = data.drop(['year', 'month', 'day'], axis=1)


# Apply the function to the index
data.index = data.index.map(fix_year)


# Count the number of missing values for each location
missing_values = data.isnull().sum()
print(missing_values)


# Count the total number of non-missing values
total_values = data.count().sum()
print(total_values)


# Calculate the mean windspeed over all locations and times
mean_windspeed = data.mean().mean()
print(mean_windspeed)

# Calculate the min, max, mean, and standard deviation of windspeed for each location
loc_stats = pd.DataFrame({'min': data.min(), 'max': data.max(), 
                          'mean': data.mean(), 'std': data.std()})
print(loc_stats)


# Calculate the min, max, mean, and standard deviation of windspeed for each day
day_stats = pd.DataFrame({'min': data.min(axis=1), 'max': data.max(axis=1), 
                          'mean': data.mean(axis=1), 'std': data.std(axis=1)})
print(day_stats)


# Find the average windspeed in January for each location. Treat January 1961 and January 1962 both as January.
january_data = data.loc[data.index.month == 1]
january_mean = january_data.resample('M').mean()
january_mean_windspeeds=january_mean.loc[:, 'RPT':'MAL'].mean()
print(january_mean_windspeeds)


#Downsample the record to a yearly frequency for each location
yearly_data = data.resample('AS').mean()
print(yearly_data)

#Downsample the record to a monthly frequency for each location.
monthly_data = data.loc[:, 'RPT':'MAL'].resample('M').mean()
print(monthly_data)


#Downsample the record to a weekly frequency for each location.
weekly_data = data.loc[:, 'RPT':'MAL'].resample('W').mean()
print(weekly_data)

#Calculate the min, max and mean windspeeds and standard deviations of the windspeeds across all locations for each week
week1_start = pd.Timestamp('1961-01-02')
week52_end = pd.Timestamp('1961-12-25')
week_data = data.loc[week1_start:week52_end].resample('W').agg(['min', 'max', 'mean', 'std'])
print(week_data)