# Akeed Restaurant Recommendation Challenge

## Importing libraries:

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import psycopg2
import geopandas as gpd
from shapely import wkt
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler

## Inspecting the base layers:

### Base Layer 1

In [None]:
#Training Locations
training_locations = pd.read_csv("./train_locations.csv")
print("\n This table has " + str(len(training_locations)) + " rows.")
training_locations.head()
#training_locations.isnull().sum()

### Base Layer 2

In [None]:
#Training Customers
training_customers = pd.read_csv("./train_customers.csv")
print("\n This table has " + str(len(training_customers)) + " rows.")
training_customers.head()
#training_customers.isnull().sum()

### Base Layer 3

In [None]:
#Orders
orders = pd.read_csv("./orders.csv")
print("\n This table has " + str(len(orders)) + " rows.")
orders.head()
#orders.isnull().sum()

### Base Layer 4

In [None]:
#Vendors
vendors = pd.read_csv("./vendors.csv")
print("\n This table has " + str(len(vendors)) + " rows.")
vendors.head()
#vendors.isnull().sum()

## Pre-process tables where need be

### Deal with null values

In [None]:
#Training Locations
training_locations = training_locations.fillna(value="unknown")
training_locations.head()

In [None]:
#Training Customers
training_customers = training_customers.fillna(value="unknown")
training_customers.head()

In [None]:
#Orders
orders = orders.fillna(value="unknown")
orders.head()

In [None]:
#Vendors
vendors = vendors.fillna(value="unknown")
vendors.head()

### Deal with mixed data-type fields

In [None]:
#find mixed data type columns
for col in orders.columns:
    weird = (orders[[col]].applymap(type) != orders[[col]].iloc[0].apply(type)).any(axis=1)
    if len(orders[weird]) > 0:
        print(col)

In [None]:
#find mixed data type cells
weird = (orders.applymap(type) != orders.iloc[0].apply(type)).any(axis=1)
orders[weird].isin([1]).sum()
'''Find and replace cell types'''

### Encode nominal fields

In [None]:
#Location Type column in Training Locations
training_locations.groupby(['location_type']).first()
dummy = pd.get_dummies(training_locations['location_type'])
encoded_training_locations = training_locations.merge(dummy, left_index=True, right_index=True)
encoded_training_locations.head()

In [None]:
#Gender column in Training Customers
training_customers.groupby(['gender']).first()
training_customers['gender'] = training_customers['gender'].str.upper()
dummy = pd.get_dummies(training_customers['gender'])
encoded_training_customers = training_customers.merge(dummy, left_index=True, right_index=True)
encoded_training_customers.head(50)
encoded_training_customers.columns
'''remove spaces from gender column values'''

### Add valuable calculated fields

In [None]:
#Add geometry field to the training locations table
encoded_training_locations['geom']=""
row_no = 0
for row in encoded_training_locations.iterrows():
    encoded_training_locations.loc[row_no, 'geom'] = ['POINT(' + str(row[1]['longitude']) + ' ' + str(row[1]['latitude']) + ')']
    row_no += 1
encoded_training_locations['geom'] = encoded_training_locations['geom'].apply(wkt.loads)
encoded_training_locations_gdf = gpd.GeoDataFrame(encoded_training_locations, geometry='geom', crs='epsg:4326')

In [None]:
#Add geometry field to the vendors table


### Feature Scale field values if need be

In [None]:
#sc = StandardScaler()
#X = sc.fit_transform(X)

## Exploring Relationships using plots

### Exploration 1

In [None]:
#Spatial Relations

ax = encoded_training_locations_gdf.plot(edgecolor = 'grey', facecolor = 'none', figsize = (18, 12), legend=True)
#roads.plot(ax=ax, color = 'purple')
#ax.set(xlim=(15, 35), ylim=(-37.5, -20))
ax.set_axis_off()
plt.title(label = 'Training Locations', fontweight = 'bold')
plt.tight_layout()

### Exploration 2

In [None]:
# Linear Relations



## Prediction Modelling