# Data Preprocessing for Climate Data 


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [2]:
%matplotlib inline

# Reading Dataset
### Dates which are present in the vegetable dataset those only consider for processing

In [12]:
climate = pd.read_csv('Climate_Nagpur.csv')
vegetables = pd.read_csv('Nagpur_Soyabean.csv')
climate.head()
len(vegetables)

287

# String is converted into Required Format 

In [4]:
climate['Date Format'] = climate['Year'].map(str) + "-" + climate['Month'].astype(str) + "-" + climate['Date'].astype(str)
climate['Date Format'] = pd.to_datetime(climate['Date Format'])
climate.head()

Unnamed: 0,Year,Month,Date,Temp_Max,Temp_Avg,Temp_Min,Dew_Max,Dew_Avg,Dew_Min,Humid_Max,Humid_Avg,Humid_Min,Wind_Max,Wind_Avg,Wind_Min,Pressure_Max,Pressure_Avg,Pressure_Min,Precipitation_Total,Date Format
0,2019,Jan,1,65,52.8,40,50,44.5,40,100,77.5,47,5,1.5,0,0,0,0,0.0,2019-01-01
1,2019,Jan,2,67,56.5,44,50,47.5,44,100,75.8,48,6,2.3,0,0,0,0,0.0,2019-01-02
2,2019,Jan,3,65,54.3,44,52,48.0,44,100,81.5,62,7,3.3,0,0,0,0,0.0,2019-01-03
3,2019,Jan,4,67,55.8,44,47,44.0,42,100,71.3,40,5,2.5,1,0,0,0,0.0,2019-01-04
4,2019,Jan,5,67,58.3,51,52,49.0,44,100,75.8,50,8,3.5,1,0,0,0,0.0,2019-01-05


In [5]:
vegetables.head()
vegetables.shape
climate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 630 entries, 0 to 629
Data columns (total 20 columns):
Year                   630 non-null int64
Month                  630 non-null object
Date                   630 non-null int64
Temp_Max               630 non-null int64
Temp_Avg               630 non-null float64
Temp_Min               630 non-null int64
Dew_Max                630 non-null int64
Dew_Avg                630 non-null float64
Dew_Min                630 non-null int64
Humid_Max              630 non-null int64
Humid_Avg              630 non-null float64
Humid_Min              630 non-null int64
Wind_Max               630 non-null int64
Wind_Avg               630 non-null float64
Wind_Min               630 non-null int64
Pressure_Max           630 non-null int64
Pressure_Avg           630 non-null int64
Pressure_Min           630 non-null int64
Precipitation_Total    630 non-null float64
Date Format            630 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(

In [6]:
vegetables = vegetables[vegetables['District Name'] == "Nagpur"]
vegetables = vegetables[vegetables['Market Name'] == "Bhiwapur"]
vegetables = vegetables[vegetables['Commodity'] == "Cotton"]
vegetables.head()

Unnamed: 0,Sl no.,District Name,Market Name,Commodity,Variety,Grade,Min Price (Rs./Quintal),Max Price (Rs./Quintal),Modal Price (Rs./Quintal),Price Date
0,1,Agra,Agra,Potato,Desi,FAQ,1380,1550,1450,31-Dec-19
1,2,Agra,Agra,Potato,Desi,FAQ,1300,1450,1380,30-Dec-19
2,3,Agra,Agra,Potato,Desi,FAQ,1400,1550,1480,28-Dec-19
3,4,Agra,Agra,Potato,Desi,FAQ,1350,1550,1450,27-Dec-19
4,5,Agra,Agra,Potato,Desi,FAQ,1350,1550,1440,26-Dec-19


# Function to generate dates 

In [7]:
pd.date_range(end='1/1/2018', periods= 2)
#pd.date_range(start="24/1/2018",freq='4M', periods= 2)

DatetimeIndex(['2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D')

In [8]:
vegetables['Price Date'] = pd.to_datetime(vegetables['Price Date'])
vegetables = vegetables.iloc[::-1]
#vegetables.head(10)

# Logic for averaging all parameters

In [9]:
final = []
days_required = 210.0
for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0.0,0.0,0.0,0.0,0.0,0.0
    Temp_Max, Temp_Min, Dew_Max, Dew_Min, Humid_Max, Humid_Min,Wind_Max, Wind_Min, Pressure_Max, Pressure_Min = 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
    for row2 in pd.date_range(end=row1, periods= days_required):
        try:
            temp = climate[climate['Date Format'] == row2]
            
            Temp_Max += float(temp['Temp_Max'])
            Temp_Avg += float(temp['Temp_Avg'])
            Temp_Min += float(temp['Temp_Min'])
        
            Dew_Max += float(temp['Dew_Max'])
            Dew_Avg += float(temp['Dew_Avg'])
            Dew_Min += float(temp['Dew_Min'])
        
            Humid_Max += float(temp['Humid_Max'])
            Humid_Avg += float(temp['Humid_Avg'])
            Humid_Min += float(temp['Humid_Min'])
        
            Wind_Max += float(temp['Wind_Max'])
            Wind_Avg += float(temp['Wind_Avg'])
            Wind_Min += float(temp['Wind_Min'])
        
            Pressure_Avg += float(temp['Pressure_Avg'])
        
            Precipitation_Total += float(temp['Precipitation_Total'])
        except:
            continue
        
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])

In [10]:
final

[[Timestamp('2019-01-02 00:00:00'),
  86.17619047619047,
  78.7038095238094,
  69.84285714285714,
  63.49523809523809,
  66.67428571428562,
  63.49523809523809,
  89.22857142857143,
  70.45904761904755,
  52.628571428571426,
  7.109523809523809,
  4.4385714285714215,
  2.2,
  0.0,
  0.0,
  0.0,
  0.0],
 [Timestamp('2019-01-03 00:00:00'),
  85.99047619047619,
  78.51238095238082,
  69.64285714285714,
  63.37619047619047,
  66.54809523809513,
  63.37619047619047,
  89.31904761904762,
  70.58285714285708,
  52.766666666666666,
  7.109523809523809,
  4.429047619047612,
  2.1857142857142855,
  0.0,
  0.0,
  0.0,
  0.0],
 [Timestamp('2019-01-04 00:00:00'),
  85.82857142857142,
  78.32666666666654,
  69.43333333333334,
  63.22380952380952,
  66.39190476190467,
  63.22380952380952,
  89.41904761904762,
  70.65095238095232,
  52.76190476190476,
  7.085714285714285,
  4.413333333333327,
  2.1904761904761907,
  0.0,
  0.0,
  0.0,
  0.0],
 [Timestamp('2019-01-05 00:00:00'),
  85.66190476190476,
  

In [11]:
len(final)

287

In [None]:
final

In [13]:
df = pd.DataFrame(data = final, columns = ['Date','Temp_Max', 'Temp_Avg', 'Temp_Min', 'Dew_Max', 'Dew_Avg', 'Dew_Min', 'Humid_Max', 'Humid_Avg','Humid_Min','Wind_Max','Wind_Avg','Wind_Min','Pressure_Max', 'Pressure_Avg','Pressure_Min', 'Precipitation_Total'])

# CSV File is generated here 

In [14]:
df.to_csv('climate_Agra_210.csv') 

In [None]:
#for row2 in pd.date_range(end=row1, periods= days_required):
        #temp = climate[climate['Date Format'] == row2]
        #print(row2)
        '''Temp_Avg += int(temp['Temp_Avg'])
        Dew_Avg += int(temp['Dew_Avg'])
        Humid_Avg += int(temp['Humid_Avg'])
        Wind_Avg += int(temp['Wind_Avg']) 
        Pressure_Avg += int(temp['Pressure_Avg'])
        Precipitation_Total += int(temp['Precipitation_Total'])
    final.append([row1,Temp_Avg / month_required, Dew_Avg/month_required, Humid_Avg/month_required, Wind_Avg/month_required, Pressure_Avg/month_required,Precipitation_Total/month_required])'''
'''for row1 in vegetables['Price Date']:
    Temp_Avg, Dew_Avg,Humid_Avg,Wind_Avg,Pressure_Avg, Precipitation_Total = 0,0,0,0,0,0
    temp = climate[climate['Date Format'] == row1]
    print(temp)'''
'''
Temp_Max += temp['Temp_Max'].astype(float)
        Temp_Avg += temp['Temp_Avg'].astype(float)
        Temp_Min += temp['Temp_Min'].astype(float)
        
        Dew_Max += temp['Dew_Max'].astype(float)
        Dew_Avg += temp['Dew_Avg'].astype(float)
        Dew_Min += temp['Dew_Min'].astype(float)
        
        Humid_Max += temp['Humid_Max'].astype(float)
        Humid_Avg += temp['Humid_Avg'].astype(float)
        Humid_Min += temp['Humid_Min'].astype(float)
        
        Wind_Max += temp['Wind_Max'].astype(float)
        Wind_Avg += temp['Wind_Avg'].astype(float)
        Wind_Min += temp['Wind_Min'].astype(float)
        
        Pressure_Avg += temp['Pressure_Avg'].astype(float)
        
        Precipitation_Total += temp['Precipitation_Total'].astype(float)
    final.append([row1,Temp_Max/days_required,Temp_Avg / days_required, Temp_Min / days_required,Dew_Min/ days_required,Dew_Avg/ days_required, Dew_Min/ days_required, Humid_Max/ days_required,Humid_Avg/ days_required, Humid_Min/ days_required, Wind_Max/ days_required, Wind_Avg/ days_required, Wind_Min/ days_required, Pressure_Max/ days_required, Pressure_Avg/ days_required, Pressure_Min/ days_required, Precipitation_Total/ days_required])
    
Temp_Max += int(temp['Temp_Max'])
        Temp_Avg += int(temp['Temp_Avg'])
        Temp_Min += int(temp['Temp_Min'])
        
        Dew_Max += int(temp['Dew_Max'])
        Dew_Avg += int(temp['Dew_Avg'])
        Dew_Min += int(temp['Dew_Min'])
        
        Humid_Max += int(temp['Humid_Max'])
        Humid_Avg += int(temp['Humid_Avg'])
        Humid_Min += int(temp['Humid_Min'])
        
        Wind_Max += int(temp['Wind_Max'])
        Wind_Avg += int(temp['Wind_Avg'])
        Wind_Min += int(temp['Wind_Min'])
        
        Pressure_Avg += int(temp['Pressure_Avg'])
        
        Precipitation_Total += int(temp['Precipitation_Total'])
'''