# Data Preprocessing for Climate Data 


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [3]:
%matplotlib inline

# Reading Dataset
### Dates which are present in the vegetable dataset those only consider for processing

In [4]:
climate = pd.read_csv('Ahmedabad_Climate.csv')
vegetables = pd.read_csv('Ahmedabad_Paddy.csv')
climate.head()
len(vegetables)

215

# String is converted into Required Format 

In [5]:
climate['Date Format'] = climate['Year'].map(str) + "-" + climate['Month'].astype(str) + "-" + climate['Date'].astype(str)
climate['Date Format'] = pd.to_datetime(climate['Date Format'])
climate.head()

Unnamed: 0,Year,Month,Date,Temp_Max,Temp_Avg,Temp_Min,Dew_Max,Dew_Avg,Dew_Min,Humid_Max,Humid_Avg,Humid_Min,Wind_Max,Wind_Avg,Wind_Min,Pressure_Max,Pressure_Avg,Pressure_Min,Precipitation_Total,Date Format
0,2019,Jan,1,86,65.4,48,45,40.8,36,71,45.2,17,6,1.1,0,30.0,29.9,29.8,0,2019-01-01
1,2019,Jan,2,84,65.6,48,46,41.1,32,82,46.4,15,7,1.6,0,29.9,29.9,29.8,0,2019-01-02
2,2019,Jan,3,84,66.7,52,45,39.0,32,63,40.1,15,8,2.8,0,30.0,29.9,29.9,0,2019-01-03
3,2019,Jan,4,84,66.6,54,46,43.2,39,72,45.6,20,6,2.2,0,29.9,29.8,29.8,0,2019-01-04
4,2019,Jan,5,84,67.8,52,52,45.3,39,77,48.6,20,6,1.8,0,29.9,29.8,29.7,0,2019-01-05


In [6]:
vegetables.head()
vegetables.shape
climate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 789 entries, 0 to 788
Data columns (total 20 columns):
Year                   789 non-null int64
Month                  789 non-null object
Date                   789 non-null int64
Temp_Max               789 non-null int64
Temp_Avg               789 non-null float64
Temp_Min               789 non-null int64
Dew_Max                789 non-null int64
Dew_Avg                789 non-null float64
Dew_Min                789 non-null int64
Humid_Max              789 non-null int64
Humid_Avg              789 non-null float64
Humid_Min              789 non-null int64
Wind_Max               789 non-null int64
Wind_Avg               789 non-null float64
Wind_Min               789 non-null int64
Pressure_Max           789 non-null float64
Pressure_Avg           789 non-null float64
Pressure_Min           789 non-null float64
Precipitation_Total    789 non-null int64
Date Format            789 non-null datetime64[ns]
dtypes: datetime64[ns](1), floa

In [7]:
vegetables = vegetables[vegetables['District Name'] == "Ahmedabad"]
vegetables = vegetables[vegetables['Market Name'] == "Dhandhuka"]
vegetables = vegetables[vegetables['Variety'] == "Shanker 6 (B) 30mm FIne"]
vegetables.head()

Unnamed: 0,Sl no.,District Name,Market Name,Commodity,Variety,Grade,Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Price Date
1,2,Ahmedabad,Dholka(Koth),Paddy(Dhan)(Common),Other,FAQ,1550,1755,1750,25-Jun-19
2,3,Ahmedabad,Dholka(Koth),Paddy(Dhan)(Common),Other,FAQ,1550,1760,1750,22-Jun-19
3,4,Ahmedabad,Dholka(Koth),Paddy(Dhan)(Common),Other,FAQ,1300,1755,1750,19-Jun-19
4,5,Ahmedabad,Dholka(Koth),Paddy(Dhan)(Common),Other,FAQ,1300,1770,1750,17-Jun-19
5,6,Ahmedabad,Dholka(Koth),Paddy(Dhan)(Common),Other,FAQ,1250,1755,1750,14-Jun-19


# Function to generate dates 

In [8]:
pd.date_range(end='1/1/2018', periods= 2)
#pd.date_range(start="24/1/2018",freq='4M', periods= 2)

DatetimeIndex(['2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D')

In [9]:
vegetables['Price Date'] = pd.to_datetime(vegetables['Price Date'])
vegetables = vegetables.iloc[::-1]
#vegetables.head(10)

# Logic for averaging all parameters

In [10]:
final = []
days_required = 210.0
for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0.0,0.0,0.0,0.0,0.0,0.0
    Temp_Max, Temp_Min, Dew_Max, Dew_Min, Humid_Max, Humid_Min,Wind_Max, Wind_Min, Pressure_Max, Pressure_Min = 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
    for row2 in pd.date_range(end=row1, periods= days_required):
        try:
            temp = climate[climate['Date Format'] == row2]
            
            Temp_Max += float(temp['Temp_Max'])
            Temp_Avg += float(temp['Temp_Avg'])
            Temp_Min += float(temp['Temp_Min'])
        
            Dew_Max += float(temp['Dew_Max'])
            Dew_Avg += float(temp['Dew_Avg'])
            Dew_Min += float(temp['Dew_Min'])
        
            Humid_Max += float(temp['Humid_Max'])
            Humid_Avg += float(temp['Humid_Avg'])
            Humid_Min += float(temp['Humid_Min'])
        
            Wind_Max += float(temp['Wind_Max'])
            Wind_Avg += float(temp['Wind_Avg'])
            Wind_Min += float(temp['Wind_Min'])
        
            Pressure_Avg += float(temp['Pressure_Avg'])
        
            Precipitation_Total += float(temp['Precipitation_Total'])
        except:
            continue
        
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])

In [11]:
final

[[Timestamp('2019-01-01 00:00:00'),
  93.82380952380953,
  82.58380952380952,
  70.42857142857143,
  59.34761904761905,
  65.54380952380951,
  59.34761904761905,
  79.23809523809524,
  59.9366666666667,
  39.60952380952381,
  10.9,
  5.123333333333331,
  1.5714285714285714,
  0.0,
  29.534761904761865,
  0.0,
  0.0],
 [Timestamp('2019-01-02 00:00:00'),
  93.68571428571428,
  82.42190476190476,
  70.23809523809524,
  59.2,
  65.3942857142857,
  59.2,
  79.31428571428572,
  59.94476190476195,
  39.58095238095238,
  10.861904761904762,
  5.088571428571427,
  1.542857142857143,
  0.0,
  29.53714285714282,
  0.0,
  0.0],
 [Timestamp('2019-01-03 00:00:00'),
  93.54761904761905,
  82.27619047619046,
  70.06666666666666,
  59.03809523809524,
  65.22571428571428,
  59.03809523809524,
  79.28095238095239,
  59.89523809523814,
  39.53333333333333,
  10.833333333333334,
  5.0557142857142825,
  1.5142857142857142,
  0.0,
  29.551904761904723,
  0.0,
  0.0],
 [Timestamp('2019-01-05 00:00:00'),
  93.

In [12]:
len(final)

76

In [13]:
final

[[Timestamp('2019-01-01 00:00:00'),
  93.82380952380953,
  82.58380952380952,
  70.42857142857143,
  59.34761904761905,
  65.54380952380951,
  59.34761904761905,
  79.23809523809524,
  59.9366666666667,
  39.60952380952381,
  10.9,
  5.123333333333331,
  1.5714285714285714,
  0.0,
  29.534761904761865,
  0.0,
  0.0],
 [Timestamp('2019-01-02 00:00:00'),
  93.68571428571428,
  82.42190476190476,
  70.23809523809524,
  59.2,
  65.3942857142857,
  59.2,
  79.31428571428572,
  59.94476190476195,
  39.58095238095238,
  10.861904761904762,
  5.088571428571427,
  1.542857142857143,
  0.0,
  29.53714285714282,
  0.0,
  0.0],
 [Timestamp('2019-01-03 00:00:00'),
  93.54761904761905,
  82.27619047619046,
  70.06666666666666,
  59.03809523809524,
  65.22571428571428,
  59.03809523809524,
  79.28095238095239,
  59.89523809523814,
  39.53333333333333,
  10.833333333333334,
  5.0557142857142825,
  1.5142857142857142,
  0.0,
  29.551904761904723,
  0.0,
  0.0],
 [Timestamp('2019-01-05 00:00:00'),
  93.

In [14]:
df = pd.DataFrame(data = final, columns = ['Date','Temp_Max', 'Temp_Avg', 'Temp_Min', 'Dew_Max', 'Dew_Avg', 'Dew_Min', 'Humid_Max', 'Humid_Avg','Humid_Min','Wind_Max','Wind_Avg','Wind_Min','Pressure_Max', 'Pressure_Avg','Pressure_Min', 'Precipitation_Total'])

# CSV File is generated here 

In [15]:
df.to_csv('climate_Ahemdabad_210_1.csv') 

In [None]:
#for row2 in pd.date_range(end=row1, periods= days_required):
        #temp = climate[climate['Date Format'] == row2]
        #print(row2)
        '''Temp_Avg += int(temp['Temp_Avg'])
        Dew_Avg += int(temp['Dew_Avg'])
        Humid_Avg += int(temp['Humid_Avg'])
        Wind_Avg += int(temp['Wind_Avg']) 
        Pressure_Avg += int(temp['Pressure_Avg'])
        Precipitation_Total += int(temp['Precipitation_Total'])
    final.append([row1,Temp_Avg / month_required, Dew_Avg/month_required, Humid_Avg/month_required, Wind_Avg/month_required, Pressure_Avg/month_required,Precipitation_Total/month_required])'''
'''for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0,0,0,0,0,0
    temp = climate[climate['Date Format'] == row1]
    print(temp)'''
'''
Temp_Max += temp['Temp_Max'].astype(float)
        Temp_Avg += temp['Temp_Avg'].astype(float)
        Temp_Min += temp['Temp_Min'].astype(float)
        
        Dew_Max += temp['Dew_Max'].astype(float)
        Dew_Avg += temp['Dew_Avg'].astype(float)
        Dew_Min += temp['Dew_Min'].astype(float)
        
        Humid_Max += temp['Humid_Max'].astype(float)
        Humid_Avg += temp['Humid_Avg'].astype(float)
        Humid_Min += temp['Humid_Min'].astype(float)
        
        Wind_Max += temp['Wind_Max'].astype(float)
        Wind_Avg += temp['Wind_Avg'].astype(float)
        Wind_Min += temp['Wind_Min'].astype(float)
        
        Pressure_Avg += temp['Pressure_Avg'].astype(float)
        
        Precipitation_Total += temp['Precipitation_Total'].astype(float)
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])
    
Temp_Max += int(temp['Temp_Max'])
        Temp_Avg += int(temp['Temp_Avg'])
        Temp_Min += int(temp['Temp_Min'])
        
        Dew_Max += int(temp['Dew_Max'])
        Dew_Avg += int(temp['Dew_Avg'])
        Dew_Min += int(temp['Dew_Min'])
        
        Humid_Max += int(temp['Humid_Max'])
        Humid_Avg += int(temp['Humid_Avg'])
        Humid_Min += int(temp['Humid_Min'])
        
        Wind_Max += int(temp['Wind_Max'])
        Wind_Avg += int(temp['Wind_Avg'])
        Wind_Min += int(temp['Wind_Min'])
        
        Pressure_Avg += int(temp['Pressure_Avg'])
        
        Precipitation_Total += int(temp['Precipitation_Total'])
'''