In [57]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [58]:
# Golf|Weather Data Set from https://gerardnico.com/data_mining/weather

golf_data_header = ['Outlook', 'Temperature_Numeric', 'Temperature_Nominal', 'Humidity_Numeric', 'Humidity_Nominal', 'Windy', 'Play']

golf_data_set = [['overcast',83,'hot',86,'high',False,True],
['overcast',64,'cool',65,'normal',True,True],
['overcast',72,'mild',90,'high',True,True],
['overcast',81,'hot',75,'normal',False,True],
['rainy',70,'mild',96,'high',False,True],
['rainy',68,'cool',80,'normal',False,True],
['rainy',65,'cool',70,'normal',True,False],
['rainy',75,'mild',80,'normal',False,True],
['rainy',71,'mild',91,'high',True,False],
['sunny',85,'hot',85,'high',False,False],
['sunny',80,'hot',90,'high',True,False],
['sunny',72,'mild',95,'high',False,False],
['sunny',69,'cool',70,'normal',False,True],
['sunny',75,'mild',70,'normal',True,True]]

# cast list of list to Pandas DataFrame
golf_df = pd.DataFrame(golf_data_set, columns=golf_data_header)
# convert  bool to ints
golf_df[['Windy','Play']] = golf_df[['Windy','Play']].astype(int)

In [59]:
golf_df

Unnamed: 0,Outlook,Temperature_Numeric,Temperature_Nominal,Humidity_Numeric,Humidity_Nominal,Windy,Play
0,overcast,83,hot,86,high,0,1
1,overcast,64,cool,65,normal,1,1
2,overcast,72,mild,90,high,1,1
3,overcast,81,hot,75,normal,0,1
4,rainy,70,mild,96,high,0,1
5,rainy,68,cool,80,normal,0,1
6,rainy,65,cool,70,normal,1,0
7,rainy,75,mild,80,normal,0,1
8,rainy,71,mild,91,high,1,0
9,sunny,85,hot,85,high,0,0


In [60]:
golf_df['Outlook'].unique()

array(['overcast', 'rainy', 'sunny'], dtype=object)

In [61]:
golf_df['Temperature_Nominal'].unique()

array(['hot', 'cool', 'mild'], dtype=object)

In [62]:
golf_df['Humidity_Nominal'].unique()

array(['high', 'normal'], dtype=object)

In [63]:
golf_df['Play'].value_counts()

1    9
0    5
Name: Play, dtype: int64

In [64]:
golf_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Outlook              14 non-null     object
 1   Temperature_Numeric  14 non-null     int64 
 2   Temperature_Nominal  14 non-null     object
 3   Humidity_Numeric     14 non-null     int64 
 4   Humidity_Nominal     14 non-null     object
 5   Windy                14 non-null     int64 
 6   Play                 14 non-null     int64 
dtypes: int64(4), object(3)
memory usage: 912.0+ bytes


In [65]:
golf_df.describe()

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Play
count,14.0,14.0,14.0,14.0
mean,73.571429,81.642857,0.428571,0.642857
std,6.571667,10.285218,0.513553,0.497245
min,64.0,65.0,0.0,0.0
25%,69.25,71.25,0.0,0.0
50%,72.0,82.5,0.0,1.0
75%,78.75,90.0,1.0,1.0
max,85.0,96.0,1.0,1.0


In [66]:
def prepare_data_for_model(raw_dataframe, target_columns, drop_first = False, make_na_col = False):
    # dummy all categorical fields 
    dataframe_dummy = pd.get_dummies(raw_dataframe, columns=target_columns, 
                                     drop_first=drop_first, 
                                     dummy_na=make_na_col)
    return (dataframe_dummy)

# create dummy features 
golf_df_ready = prepare_data_for_model(golf_df, target_columns=['Outlook', 'Temperature_Nominal', 'Humidity_Nominal'])
golf_df_ready = golf_df_ready.dropna() 
golf_df_ready.head()

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Play,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
0,83,86,0,1,1,0,0,0,1,0,1,0
1,64,65,1,1,1,0,0,1,0,0,0,1
2,72,90,1,1,1,0,0,0,0,1,1,0
3,81,75,0,1,1,0,0,0,1,0,0,1
4,70,96,0,1,0,1,0,0,0,1,1,0


In [67]:
golf_df_ready

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Play,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
0,83,86,0,1,1,0,0,0,1,0,1,0
1,64,65,1,1,1,0,0,1,0,0,0,1
2,72,90,1,1,1,0,0,0,0,1,1,0
3,81,75,0,1,1,0,0,0,1,0,0,1
4,70,96,0,1,0,1,0,0,0,1,1,0
5,68,80,0,1,0,1,0,1,0,0,0,1
6,65,70,1,0,0,1,0,1,0,0,0,1
7,75,80,0,1,0,1,0,0,0,1,0,1
8,71,91,1,0,0,1,0,0,0,1,1,0
9,85,85,0,0,0,0,1,0,1,0,1,0


In [68]:
X = golf_df_ready.drop('Play', axis=1)
y = golf_df_ready['Play']

In [69]:
X

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
0,83,86,0,1,0,0,0,1,0,1,0
1,64,65,1,1,0,0,1,0,0,0,1
2,72,90,1,1,0,0,0,0,1,1,0
3,81,75,0,1,0,0,0,1,0,0,1
4,70,96,0,0,1,0,0,0,1,1,0
5,68,80,0,0,1,0,1,0,0,0,1
6,65,70,1,0,1,0,1,0,0,0,1
7,75,80,0,0,1,0,0,0,1,0,1
8,71,91,1,0,1,0,0,0,1,1,0
9,85,85,0,0,0,1,0,1,0,1,0


In [70]:
y

0     1
1     1
2     1
3     1
4     1
5     1
6     0
7     1
8     0
9     0
10    0
11    0
12    1
13    1
Name: Play, dtype: int64

In [71]:
X_train, X_test , y_train, y_test = train_test_split(X,y, train_size=0.7, shuffle=True, random_state=1)

In [72]:
X_train

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
4,70,96,0,0,1,0,0,0,1,1,0
1,64,65,1,1,0,0,1,0,0,0,1
12,69,70,0,0,0,1,1,0,0,0,1
0,83,86,0,1,0,0,0,1,0,1,0
13,75,70,1,0,0,1,0,0,1,0,1
9,85,85,0,0,0,1,0,1,0,1,0
8,71,91,1,0,1,0,0,0,1,1,0
11,72,95,0,0,0,1,0,0,1,1,0
5,68,80,0,0,1,0,1,0,0,0,1


In [73]:
X_test

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
3,81,75,0,1,0,0,0,1,0,0,1
7,75,80,0,0,1,0,0,0,1,0,1
6,65,70,1,0,1,0,1,0,0,0,1
2,72,90,1,1,0,0,0,0,1,1,0
10,80,90,1,0,0,1,0,1,0,1,0


In [74]:
from sklearn.preprocessing import StandardScaler

In [75]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)

In [76]:
X_train

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
4,-0.459279,1.294915,-0.707107,-0.534522,1.414214,-0.894427,-0.707107,-0.534522,1.118034,0.894427,-0.894427
1,-1.377838,-1.572397,1.414214,1.870829,-0.707107,-0.894427,1.414214,-0.534522,-0.894427,-1.118034,1.118034
12,-0.612372,-1.109928,-0.707107,-0.534522,-0.707107,1.118034,1.414214,-0.534522,-0.894427,-1.118034,1.118034
0,1.530931,0.369976,-0.707107,1.870829,-0.707107,-0.894427,-0.707107,1.870829,-0.894427,0.894427,-0.894427
13,0.306186,-1.109928,1.414214,-0.534522,-0.707107,1.118034,-0.707107,-0.534522,1.118034,-1.118034,1.118034
9,1.837117,0.277482,-0.707107,-0.534522,-0.707107,1.118034,-0.707107,1.870829,-0.894427,0.894427,-0.894427
8,-0.306186,0.832446,1.414214,-0.534522,1.414214,-0.894427,-0.707107,-0.534522,1.118034,0.894427,-0.894427
11,-0.153093,1.202422,-0.707107,-0.534522,-0.707107,1.118034,-0.707107,-0.534522,1.118034,0.894427,-0.894427
5,-0.765466,-0.184988,-0.707107,-0.534522,1.414214,-0.894427,1.414214,-0.534522,-0.894427,-1.118034,1.118034


In [77]:
X_test

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
3,1.224745,-0.647458,-0.707107,1.870829,-0.707107,-0.894427,-0.707107,1.870829,-0.894427,-1.118034,1.118034
7,0.306186,-0.184988,-0.707107,-0.534522,1.414214,-0.894427,-0.707107,-0.534522,1.118034,-1.118034,1.118034
6,-1.224745,-1.109928,1.414214,-0.534522,1.414214,-0.894427,1.414214,-0.534522,-0.894427,-1.118034,1.118034
2,-0.153093,0.739952,1.414214,1.870829,-0.707107,-0.894427,-0.707107,-0.534522,1.118034,0.894427,-0.894427
10,1.071652,0.739952,1.414214,-0.534522,-0.707107,1.118034,-0.707107,1.870829,-0.894427,0.894427,-0.894427


In [78]:
model = GaussianNB()
model = model.fit(X_train, y_train)

In [79]:
model.score(X_test, y_test)

0.8

In [80]:
# Predictions
print(model.predict(X_test))

[1 1 1 1 0]


In [81]:
# how about predicting on new data?
future_golf_data = pd.DataFrame([[85,40,0,0,0,1,0,0,1,0,1]], columns=X_train.columns)
future_golf_data.head()

Unnamed: 0,Temperature_Numeric,Humidity_Numeric,Windy,Outlook_overcast,Outlook_rainy,Outlook_sunny,Temperature_Nominal_cool,Temperature_Nominal_hot,Temperature_Nominal_mild,Humidity_Nominal_high,Humidity_Nominal_normal
0,85,40,0,0,0,1,0,0,1,0,1


In [82]:
print("Will I Golf Tomorrow? %s" % bool(model.predict(future_golf_data.head(1))[0]))

Will I Golf Tomorrow? True


# Openweathermap API

In [84]:
# bring in real weather data using openweathermap and json.load
from urllib.request import urlopen
import json
YOUR_OPENWEATHERMAP_API_KEY = '5cfda78e35e599ee98a41bc4baf07e1c'
weather_json = json.load(urlopen("http://api.openweathermap.org/data/2.5/weather?q=Kolkata&appid=" + YOUR_OPENWEATHERMAP_API_KEY))
weather_json

{'base': 'stations',
 'clouds': {'all': 20},
 'cod': 200,
 'coord': {'lat': 22.5697, 'lon': 88.3697},
 'dt': 1615301574,
 'id': 1275004,
 'main': {'feels_like': 303.5,
  'humidity': 78,
  'pressure': 1011,
  'temp': 300.15,
  'temp_max': 300.15,
  'temp_min': 300.15},
 'name': 'Kolkata',
 'sys': {'country': 'IN',
  'id': 9114,
  'sunrise': 1615249268,
  'sunset': 1615291998,
  'type': 1},
 'timezone': 19800,
 'visibility': 3200,
 'weather': [{'description': 'haze',
   'icon': '50n',
   'id': 721,
   'main': 'Haze'}],
 'wind': {'deg': 160, 'speed': 2.57}}

In [85]:
# access individual sections
weather_json['main']

{'feels_like': 303.5,
 'humidity': 78,
 'pressure': 1011,
 'temp': 300.15,
 'temp_max': 300.15,
 'temp_min': 300.15}

## Translating Data From OpenWeatherMap to Golf|Weather Data