# Numpy & Pandas: Analyzing Weather Data Set

### Basic Imports

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mode

path = 'weather_2012.csv'
weather = pd.read_csv(path)

### Code for categorical variable
* Extract names of categorical column     
* This function accepts a dataframe and returns categorical list, containing the names of categorical columns(categorical_var).
    
* Keyword arguments: df - Pandas dataframe from which the columns name will be extracted
        
* Returns: categorical_var - List of categorical features

In [2]:
def categorial(df):
    return weather.select_dtypes(include='object').columns


categorial(weather)

Index(['Date/Time', 'Weather'], dtype='object')

### Code for numerical variable
* Extract names of numerical column
    
* This function accepts a dataframe and returns numerical list,
    containing the names of numerical columns(numerical_var).
        
* Keyword arguments:
    df - Pandas dataframe from which the columns name will be extracted
    
* Returns:
    numerical_var - List of numerical features

In [3]:
def numerical(df):
    return weather.select_dtypes(include='number').columns


numerical(weather)

Index(['Temp (C)', 'Dew Point Temp (C)', 'Rel Hum (%)', 'Wind Spd (km/h)',
       'Visibility (km)', 'Stn Press (kPa)'],
      dtype='object')

### Code to check distribution of variable
* Check distribution of variable
    
* This function accepts a dataframe,column(feature) and value which returns count of the value,
    containing the value counts of a variable(value_counts)
    
* Keyword arguments:
    df - Pandas dataframe
    col - Feature of the datagrame
    val - value of the feature
    
* Returns:
    value_counts - Value count of the feature 

In [4]:
def clear(df, col, val):
    return df[col].value_counts()[val]


clear(weather, 'Weather', 'Cloudy')

1728

### Code to check instances based on the condition
* Instances based on the condition
    
* This function accepts a dataframe, 2 columns(feature) and 2 values which returns the dataframe
    based on the condition.
    
* Keyword arguments:
    df - Pandas dataframe which has the data.
    col1 - First feature of the dataframe on which you want to apply the filter
    val1 - Value to be filtered on the first feature
    col2 - Second feature of the dataframe on which you want to apply the filter
    val2 - Value to be filtered on second feature
    
* Returns:
    instance - Generated dataframe

In [5]:
def instances_based_condition(df, col1, val1, col2, val2):
    return df[(df[col1] == val1) & (df[col2] == val2)]


instances_based_condition(weather, 'Wind Spd (km/h)', 35, 'Visibility (km)',
                          25)

Unnamed: 0,Date/Time,Temp (C),Dew Point Temp (C),Rel Hum (%),Wind Spd (km/h),Visibility (km),Stn Press (kPa),Weather
24,2012-01-02 00:00:00,5.2,1.5,77,35,25.0,99.26,Rain Showers
217,2012-01-10 01:00:00,2.3,-3.4,66,35,25.0,100.28,Snow
582,2012-01-25 06:00:00,-2.8,-9.0,62,35,25.0,101.66,Mostly Cloudy
868,2012-02-06 04:00:00,0.8,-3.7,72,35,25.0,100.68,Snow
1339,2012-02-25 19:00:00,-4.7,-10.5,64,35,25.0,100.04,Cloudy
2569,2012-04-17 01:00:00,18.9,9.6,55,35,25.0,100.42,Mainly Clear
2709,2012-04-22 21:00:00,4.3,-1.0,68,35,25.0,101.07,Rain
5833,2012-08-31 01:00:00,23.5,14.1,56,35,25.0,100.51,Mainly Clear
5834,2012-08-31 02:00:00,23.2,13.9,56,35,25.0,100.5,Mainly Clear
6290,2012-09-19 02:00:00,9.7,7.6,87,35,25.0,100.1,Cloudy


### Code to calculate different aggregated values according to month
* Aggregate values according to month
    
* This function accepts a dataframe, 2 columns(feature) and aggregated funcion(agg) which returns the Pivot 
    table with different aggregated value of the feature with an index of the month.
     
* Keyword arguments:
    df - Pandas dataframe which has the data.
    date_col - Date feature of the dataframe on which you want to apply to_datetime conversion
    agg_col - Feature of the dataframe on which values will be aggregated.
    agg - The function to be used for aggregating the df (eg. 'mean', 'min', 'max').
    
* Returns:
    aggregated_value - Generated pivot table

In [6]:
weather['Date/Time'] = pd.to_datetime(weather['Date/Time'])

In [7]:
def agg_values_ina_month(df, date_col, agg_col, agg):
    return df.pivot_table(agg_col, index=df[date_col].dt.month, aggfunc=agg)


agg_values_ina_month(weather, 'Date/Time', 'Temp (C)', 'count')

Unnamed: 0_level_0,Temp (C)
Date/Time,Unnamed: 1_level_1
1,744
2,696
3,744
4,720
5,744
6,720
7,744
8,744
9,720
10,744


### Code to group values based on the feature
* Agrregate values by grouping
    
* This function accepts a dataframe, 1 column(feature) and aggregated function(agg1) which groupby the 
    datframe based on the column.
   
* Keyword arguments:
    df - Pandas dataframe which has the data.
    col1 - Feature of the dataframe on which values will be aggregated.
    agg1 - The function to be used for aggregating the df (eg. 'mean', 'min', 'max').
    
* Returns:
    grouping - Dataframe with all columns on which it is grouped on.

In [11]:
def group_values(df, col1, agg1):
    return df.groupby(col1).agg(agg1)


group_values(weather, 'Weather', 'mean')

Unnamed: 0_level_0,Temp (C),Dew Point Temp (C),Rel Hum (%),Wind Spd (km/h),Visibility (km),Stn Press (kPa)
Weather,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Clear,6.825716,0.089367,64.497738,10.557315,30.153243,101.587443
Cloudy,7.970544,2.37581,69.592593,16.127315,26.625752,100.911441
Drizzle,7.353659,5.504878,88.243902,16.097561,17.931707,100.435366
"Drizzle,Fog",8.0675,7.03375,93.275,11.8625,5.2575,100.786625
"Drizzle,Ice Pellets,Fog",0.4,-0.7,92.0,20.0,4.0,100.79
"Drizzle,Snow",1.05,0.15,93.5,14.0,10.5,100.89
"Drizzle,Snow,Fog",0.693333,0.12,95.866667,15.533333,5.513333,99.281333
Fog,4.303333,3.159333,92.286667,7.946667,6.248,101.184067
Freezing Drizzle,-5.657143,-8.0,83.571429,16.571429,9.2,100.202857
"Freezing Drizzle,Fog",-2.533333,-4.183333,88.5,17.0,5.266667,100.441667


### Function for conversion 
* Convert temperatures from celsius to fahrenhheit
    
* This function accepts a dataframe, 1 column(feature) which returns the dataframe with converted values from 
    celsius to fahrenhheit.
         
* Keyword arguments:
    df - Pandas dataframe which has the data.
    celsius - Temperature feature of the dataframe which you want to convert to fahrenhheit
    
* Returns:
    converted_temp - Generated dataframe with Fahrenhheit temp.

In [None]:
def convert(df, celsius):
    df[celsius] = df[celsius].apply(lambda x: (x * 9 / 5) + 32)
    return df


convert(weather, 'Temp (C)')