# Data Preprocessing for Climate Data 


In [10]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [11]:
%matplotlib inline

# Reading Dataset
### Dates which are present in the vegetable dataset those only consider for processing

In [12]:
climate = pd.read_csv('Ahmedabad_Climate.csv')
vegetables = pd.read_csv('Ahmedabad_Cotton.csv')
climate.head()
len(vegetables)

244

# String is converted into Required Format 

In [13]:
climate['Date Format'] = climate['Year'].map(str) + "-" + climate['Month'].astype(str) + "-" + climate['Date'].astype(str)
climate['Date Format'] = pd.to_datetime(climate['Date Format'])
climate.head()

Unnamed: 0,Year,Month,Date,Temp_Max,Temp_Avg,Temp_Min,Dew_Max,Dew_Avg,Dew_Min,Humid_Max,Humid_Avg,Humid_Min,Wind_Max,Wind_Avg,Wind_Min,Pressure_Max,Pressure_Avg,Pressure_Min,Precipitation_Total,Date Format
0,2019,Jan,1,86,65.4,48,45,40.8,36,71,45.2,17,6,1.1,0,30.0,29.9,29.8,0,2019-01-01
1,2019,Jan,2,84,65.6,48,46,41.1,32,82,46.4,15,7,1.6,0,29.9,29.9,29.8,0,2019-01-02
2,2019,Jan,3,84,66.7,52,45,39.0,32,63,40.1,15,8,2.8,0,30.0,29.9,29.9,0,2019-01-03
3,2019,Jan,4,84,66.6,54,46,43.2,39,72,45.6,20,6,2.2,0,29.9,29.8,29.8,0,2019-01-04
4,2019,Jan,5,84,67.8,52,52,45.3,39,77,48.6,20,6,1.8,0,29.9,29.8,29.7,0,2019-01-05


In [14]:
print(vegetables.head())
vegetables.shape
climate.info()

   Sl no. District Name Market Name Commodity                  Variety Grade  \
0       1     Ahmedabad   Dhandhuka    Cotton  Shanker 6 (B) 30mm FIne   FAQ   
1       2     Ahmedabad   Dhandhuka    Cotton  Shanker 6 (B) 30mm FIne   FAQ   
2       3     Ahmedabad   Dhandhuka    Cotton  Shanker 6 (B) 30mm FIne   FAQ   
3       4     Ahmedabad   Dhandhuka    Cotton  Shanker 6 (B) 30mm FIne   FAQ   
4       5     Ahmedabad   Dhandhuka    Cotton  Shanker 6 (B) 30mm FIne   FAQ   

   Min Price (Rs./Quintal)  Max Price (Rs./Quintal)  \
0                     5000                     5625   
1                     4900                     5625   
2                     4550                     5555   
3                     4500                     5555   
4                     4000                     5500   

   Modal Price (Rs./Quintal) Price Date  
0                       5310  31-Dec-19  
1                       5260  30-Dec-19  
2                       5025  28-Dec-19  
3                   

In [15]:
vegetables = vegetables[vegetables['District Name'] == "Ahmedabad"]
vegetables = vegetables[vegetables['Market Name'] == "Dhandhuka"]
vegetables = vegetables[vegetables['Commodity'] == "Cotton"]
#vegetables = vegetables[vegetables['Variety'] == "Paddy(Dhan)(Common)"]
vegetables.head()

Unnamed: 0,Sl no.,District Name,Market Name,Commodity,Variety,Grade,Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Price Date
0,1,Ahmedabad,Dhandhuka,Cotton,Shanker 6 (B) 30mm FIne,FAQ,5000,5625,5310,31-Dec-19
1,2,Ahmedabad,Dhandhuka,Cotton,Shanker 6 (B) 30mm FIne,FAQ,4900,5625,5260,30-Dec-19
2,3,Ahmedabad,Dhandhuka,Cotton,Shanker 6 (B) 30mm FIne,FAQ,4550,5555,5025,28-Dec-19
3,4,Ahmedabad,Dhandhuka,Cotton,Shanker 6 (B) 30mm FIne,FAQ,4500,5555,5030,27-Dec-19
4,5,Ahmedabad,Dhandhuka,Cotton,Shanker 6 (B) 30mm FIne,FAQ,4000,5500,4750,26-Dec-19


# Function to generate dates 

In [16]:
pd.date_range(end='1/1/2018', periods= 2)
#pd.date_range(start="24/1/2018",freq='4M', periods= 2)

DatetimeIndex(['2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D')

In [17]:
vegetables['Price Date'] = pd.to_datetime(vegetables['Price Date'])
vegetables = vegetables.iloc[::-1]
#vegetables.head(10)

# Logic for averaging all parameters

In [18]:
final = []
days_required = 160.0
for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0.0,0.0,0.0,0.0,0.0,0.0
    Temp_Max, Temp_Min, Dew_Max, Dew_Min, Humid_Max, Humid_Min,Wind_Max, Wind_Min, Pressure_Max, Pressure_Min = 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
    for row2 in pd.date_range(end=row1, periods= days_required):
        try:
            temp = climate[climate['Date Format'] == row2]
            
            Temp_Max += float(temp['Temp_Max'])
            Temp_Avg += float(temp['Temp_Avg'])
            Temp_Min += float(temp['Temp_Min'])
        
            Dew_Max += float(temp['Dew_Max'])
            Dew_Avg += float(temp['Dew_Avg'])
            Dew_Min += float(temp['Dew_Min'])
        
            Humid_Max += float(temp['Humid_Max'])
            Humid_Avg += float(temp['Humid_Avg'])
            Humid_Min += float(temp['Humid_Min'])
        
            Wind_Max += float(temp['Wind_Max'])
            Wind_Avg += float(temp['Wind_Avg'])
            Wind_Min += float(temp['Wind_Min'])
        
            Pressure_Avg += float(temp['Pressure_Avg'])
        
            Precipitation_Total += float(temp['Precipitation_Total'])
        except:
            continue
        
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])

In [19]:
final

[[Timestamp('2019-01-01 00:00:00'),
  92.21875,
  80.28687499999997,
  67.30625,
  56.0125,
  62.21312499999999,
  56.0125,
  78.63125,
  57.805625000000006,
  37.2875,
  9.8375,
  4.271874999999996,
  1.18125,
  0.0,
  29.633749999999935,
  0.0,
  0.0],
 [Timestamp('2019-01-02 00:00:00'),
  92.18125,
  80.16937499999997,
  67.1,
  55.74375,
  61.988749999999996,
  55.74375,
  78.5875,
  57.60312500000002,
  36.94375,
  9.79375,
  4.2187499999999964,
  1.14375,
  0.0,
  29.636874999999936,
  0.0,
  0.0],
 [Timestamp('2019-01-03 00:00:00'),
  92.14375,
  80.05624999999996,
  66.91875,
  55.475,
  61.748749999999994,
  55.475,
  78.45625,
  57.36187500000002,
  36.6,
  9.76875,
  4.183124999999997,
  1.10625,
  0.0,
  29.639999999999937,
  0.0,
  0.0],
 [Timestamp('2019-01-04 00:00:00'),
  92.13125,
  79.94812499999998,
  66.74375,
  55.2625,
  61.544375,
  55.2625,
  78.38125,
  57.16812500000002,
  36.2875,
  9.74375,
  4.1424999999999965,
  1.0625,
  0.0,
  29.642499999999938,
  0.0,


In [58]:
len(final)

138

In [13]:
final

[[Timestamp('2019-01-01 00:00:00'),
  93.82380952380953,
  82.58380952380952,
  70.42857142857143,
  59.34761904761905,
  65.54380952380951,
  59.34761904761905,
  79.23809523809524,
  59.9366666666667,
  39.60952380952381,
  10.9,
  5.123333333333331,
  1.5714285714285714,
  0.0,
  29.534761904761865,
  0.0,
  0.0],
 [Timestamp('2019-01-02 00:00:00'),
  93.68571428571428,
  82.42190476190476,
  70.23809523809524,
  59.2,
  65.3942857142857,
  59.2,
  79.31428571428572,
  59.94476190476195,
  39.58095238095238,
  10.861904761904762,
  5.088571428571427,
  1.542857142857143,
  0.0,
  29.53714285714282,
  0.0,
  0.0],
 [Timestamp('2019-01-03 00:00:00'),
  93.54761904761905,
  82.27619047619046,
  70.06666666666666,
  59.03809523809524,
  65.22571428571428,
  59.03809523809524,
  79.28095238095239,
  59.89523809523814,
  39.53333333333333,
  10.833333333333334,
  5.0557142857142825,
  1.5142857142857142,
  0.0,
  29.551904761904723,
  0.0,
  0.0],
 [Timestamp('2019-01-05 00:00:00'),
  93.

In [20]:
df = pd.DataFrame(data = final, columns = ['Date','Temp_Max', 'Temp_Avg', 'Temp_Min', 'Dew_Max', 'Dew_Avg', 'Dew_Min', 'Humid_Max', 'Humid_Avg','Humid_Min','Wind_Max','Wind_Avg','Wind_Min','Pressure_Max', 'Pressure_Avg','Pressure_Min', 'Precipitation_Total'])

# CSV File is generated here 

In [21]:
df.to_csv('climate_Ahemdabad_Cotton_160.csv') 

In [None]:
#for row2 in pd.date_range(end=row1, periods= days_required):
        #temp = climate[climate['Date Format'] == row2]
        #print(row2)
        '''Temp_Avg += int(temp['Temp_Avg'])
        Dew_Avg += int(temp['Dew_Avg'])
        Humid_Avg += int(temp['Humid_Avg'])
        Wind_Avg += int(temp['Wind_Avg']) 
        Pressure_Avg += int(temp['Pressure_Avg'])
        Precipitation_Total += int(temp['Precipitation_Total'])
    final.append([row1,Temp_Avg / month_required, Dew_Avg/month_required, Humid_Avg/month_required, Wind_Avg/month_required, Pressure_Avg/month_required,Precipitation_Total/month_required])'''
'''for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0,0,0,0,0,0
    temp = climate[climate['Date Format'] == row1]
    print(temp)'''
'''
Temp_Max += temp['Temp_Max'].astype(float)
        Temp_Avg += temp['Temp_Avg'].astype(float)
        Temp_Min += temp['Temp_Min'].astype(float)
        
        Dew_Max += temp['Dew_Max'].astype(float)
        Dew_Avg += temp['Dew_Avg'].astype(float)
        Dew_Min += temp['Dew_Min'].astype(float)
        
        Humid_Max += temp['Humid_Max'].astype(float)
        Humid_Avg += temp['Humid_Avg'].astype(float)
        Humid_Min += temp['Humid_Min'].astype(float)
        
        Wind_Max += temp['Wind_Max'].astype(float)
        Wind_Avg += temp['Wind_Avg'].astype(float)
        Wind_Min += temp['Wind_Min'].astype(float)
        
        Pressure_Avg += temp['Pressure_Avg'].astype(float)
        
        Precipitation_Total += temp['Precipitation_Total'].astype(float)
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])
    
Temp_Max += int(temp['Temp_Max'])
        Temp_Avg += int(temp['Temp_Avg'])
        Temp_Min += int(temp['Temp_Min'])
        
        Dew_Max += int(temp['Dew_Max'])
        Dew_Avg += int(temp['Dew_Avg'])
        Dew_Min += int(temp['Dew_Min'])
        
        Humid_Max += int(temp['Humid_Max'])
        Humid_Avg += int(temp['Humid_Avg'])
        Humid_Min += int(temp['Humid_Min'])
        
        Wind_Max += int(temp['Wind_Max'])
        Wind_Avg += int(temp['Wind_Avg'])
        Wind_Min += int(temp['Wind_Min'])
        
        Pressure_Avg += int(temp['Pressure_Avg'])
        
        Precipitation_Total += int(temp['Precipitation_Total'])
'''