In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Import Data

In [26]:
SRPumpkins = pd.read_csv('C:\South_RegionPumpkins.csv')

In [27]:
SRPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Variety,Date,Low Price,High Price,Origin,Item Size,Color
0,1,PUMPKINS,DALLAS,24 inch bins,,09/16/2017,160.0,225.0,TEXAS,lge,
1,2,PUMPKINS,DALLAS,24 inch bins,,09/23/2017,160.0,225.0,TEXAS,lge,
2,3,PUMPKINS,DALLAS,24 inch bins,,09/30/2017,160.0,225.0,TEXAS,lge,
3,4,PUMPKINS,DALLAS,24 inch bins,HOWDEN TYPE,09/24/2016,160.0,160.0,TEXAS,jbo,
4,5,PUMPKINS,DALLAS,24 inch bins,HOWDEN TYPE,09/24/2016,150.0,150.0,TEXAS,med,


In [28]:
MDWPumpkins = pd.read_csv('C:\Midwest_RegionPumpkins.csv')

In [29]:
MDWPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Variety,Date,Low Price,High Price,Origin,Item Size,Color
0,1,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/16/2017,95.0,95.0,MICHIGAN,lge,
1,2,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/16/2017,94.0,95.0,MICHIGAN,med,
2,3,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/23/2017,95.0,95.0,MICHIGAN,lge,
3,4,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/23/2017,94.0,95.0,MICHIGAN,med,
4,5,PUMPKINS,DETROIT,bins,HOWDEN TYPE,09/23/2017,175.0,175.0,MICHIGAN,xlge,


# Data Wrangling

In [30]:
def recode (series):
    if series == "sml":
        return 1
    if series == "med":
        return 2
    if series == "lge":
        return 3
    if series == "xlge":
        return 4
    if series == "jbo":
        return 5
    if series == "med-lge":
        return 6
    if series == "exjbo":
        return 7

In [31]:
SRPumpkins['ItemSizeR']=SRPumpkins['Item Size'].apply(recode)

In [32]:
SRPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Variety,Date,Low Price,High Price,Origin,Item Size,Color,ItemSizeR
0,1,PUMPKINS,DALLAS,24 inch bins,,09/16/2017,160.0,225.0,TEXAS,lge,,3.0
1,2,PUMPKINS,DALLAS,24 inch bins,,09/23/2017,160.0,225.0,TEXAS,lge,,3.0
2,3,PUMPKINS,DALLAS,24 inch bins,,09/30/2017,160.0,225.0,TEXAS,lge,,3.0
3,4,PUMPKINS,DALLAS,24 inch bins,HOWDEN TYPE,09/24/2016,160.0,160.0,TEXAS,jbo,,5.0
4,5,PUMPKINS,DALLAS,24 inch bins,HOWDEN TYPE,09/24/2016,150.0,150.0,TEXAS,med,,2.0


In [33]:
MDWPumpkins['ItemSizeR']=MDWPumpkins['Item Size'].apply(recode)

In [34]:
MDWPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Variety,Date,Low Price,High Price,Origin,Item Size,Color,ItemSizeR
0,1,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/16/2017,95.0,95.0,MICHIGAN,lge,,3.0
1,2,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/16/2017,94.0,95.0,MICHIGAN,med,,2.0
2,3,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/23/2017,95.0,95.0,MICHIGAN,lge,,3.0
3,4,PUMPKINS,DETROIT,24 inch bins,HOWDEN TYPE,09/23/2017,94.0,95.0,MICHIGAN,med,,2.0
4,5,PUMPKINS,DETROIT,bins,HOWDEN TYPE,09/23/2017,175.0,175.0,MICHIGAN,xlge,,4.0


In [35]:
def recode (series):
    if series == "HOWDEN TYPE":
        return 1
    if series == "CINDERELLA":
        return 2
    if series == "FAIRYTALE":
        return 3
    if series == "PIE TYPE":
        return 4
    if series == "MIXED HEIRLOOM VARIETIES":
        return 7
    if series == "MINIATURE":
        return 11
    if series == "HOWDEN WHITE TYPE":
        return 12

In [36]:
SRPumpkins['VarietyR']=SRPumpkins['Variety'].apply(recode)

In [37]:
SRPumpkins.drop(['Variety','Item Size'], axis=1, inplace=True)

In [38]:
SRPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Date,Low Price,High Price,Origin,Color,ItemSizeR,VarietyR
0,1,PUMPKINS,DALLAS,24 inch bins,09/16/2017,160.0,225.0,TEXAS,,3.0,
1,2,PUMPKINS,DALLAS,24 inch bins,09/23/2017,160.0,225.0,TEXAS,,3.0,
2,3,PUMPKINS,DALLAS,24 inch bins,09/30/2017,160.0,225.0,TEXAS,,3.0,
3,4,PUMPKINS,DALLAS,24 inch bins,09/24/2016,160.0,160.0,TEXAS,,5.0,1.0
4,5,PUMPKINS,DALLAS,24 inch bins,09/24/2016,150.0,150.0,TEXAS,,2.0,1.0


In [39]:
MDWPumpkins['VarietyR']=MDWPumpkins['Variety'].apply(recode)

In [40]:
MDWPumpkins.drop(['Variety','Item Size'], axis=1, inplace=True)

In [41]:
MDWPumpkins.head()

Unnamed: 0.1,Unnamed: 0,Commodity Name,City Name,Package,Date,Low Price,High Price,Origin,Color,ItemSizeR,VarietyR
0,1,PUMPKINS,DETROIT,24 inch bins,09/16/2017,95.0,95.0,MICHIGAN,,3.0,1
1,2,PUMPKINS,DETROIT,24 inch bins,09/16/2017,94.0,95.0,MICHIGAN,,2.0,1
2,3,PUMPKINS,DETROIT,24 inch bins,09/23/2017,95.0,95.0,MICHIGAN,,3.0,1
3,4,PUMPKINS,DETROIT,24 inch bins,09/23/2017,94.0,95.0,MICHIGAN,,2.0,1
4,5,PUMPKINS,DETROIT,bins,09/23/2017,175.0,175.0,MICHIGAN,,4.0,1


# Data Export

In [42]:
SRPumpkins.to_csv(r'C:\SRPumpkins.csv')

In [43]:
MDWPumpkins.to_csv(r'C:\MDWPumpkins.csv')