In [1]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns
import pandas as pd
import collections

With the provided data sets, we will investigate one of the these directions : 

- Analysis and prediction of customer satisfaction

-  Future sales predictions

- Identification of the most valuables product categories

- Customer value segmentation

- Close deal prediction

In [27]:
folder = 'Data/E-Commerce/'

## 1. Geolocation

### 1.2 Customers demographics

In [218]:
geolocation = pd.read_csv(folder + 'olist_geolocation_dataset.csv')
customers = pd.read_csv(folder + 'olist_customers_dataset.csv')

In [219]:
geolocation.columns, customers.columns

(Index(['geolocation_zip_code_prefix', 'geolocation_lat', 'geolocation_lng',
        'geolocation_city', 'geolocation_state'],
       dtype='object'),
 Index(['customer_id', 'customer_unique_id', 'customer_zip_code_prefix',
        'customer_city', 'customer_state'],
       dtype='object'))

In [220]:
# Removing some outliers
#Brazils most Northern spot is at 5 deg 16′ 27.8″ N latitude.;
geolocation = geolocation[geolocation.geolocation_lat <= 5.27438888]
#it’s most Western spot is at 73 deg, 58′ 58.19″W Long.
geolocation = geolocation[geolocation.geolocation_lng >= -73.98283055]
#It’s most southern spot is at 33 deg, 45′ 04.21″ S Latitude.
geolocation = geolocation[geolocation.geolocation_lat >= -33.75116944]
#It’s most Eastern spot is 34 deg, 47′ 35.33″ W Long.
geolocation = geolocation[geolocation.geolocation_lng <=  -34.79314722]


In [221]:
customers_location = customers.merge(geolocation, left_on='customer_zip_code_prefix', right_on='geolocation_zip_code_prefix')
customers_location.drop('customer_zip_code_prefix', axis=1, inplace=True)
customers_location.rename(columns={'geolocation_zip_code_prefix' : 'zip_code_prefix'}, inplace=True)

In [222]:
orders_location = customers_location.drop_duplicates(subset='customer_id')

In [223]:
orders_location.head()

Unnamed: 0,customer_id,customer_unique_id,customer_city,customer_state,zip_code_prefix,geolocation_lat,geolocation_lng,geolocation_city,geolocation_state
0,06b8999e2fba1a1fbc88172c00ba8bc7,861eff4711a542e4b93843c6dd7febb0,franca,SP,14409,-20.509897,-47.397866,franca,SP
147,5dca924cc99eea2dc5ba40d11ec5dd0f,2761fee7f378f0a8d7682d8a3fa07ab1,franca,SP,14409,-20.509897,-47.397866,franca,SP
294,661897d4968f1b59bfff74c7eb2eb4fc,d06a495406b79cb8203ea21cc0942f8c,franca,SP,14409,-20.509897,-47.397866,franca,SP
441,702b62324327ccba20f1be3465426437,8b3d988f330c1d1c3332ccd440c147b7,franca,SP,14409,-20.509897,-47.397866,franca,SP
588,bdf997bae7ca819b0415f5174d6b4302,866755e25db620f8d7e81b351a15bb2f,franca,SP,14409,-20.509897,-47.397866,franca,SP


In [224]:
locations = orders_location[['zip_code_prefix', 'geolocation_lat', 'geolocation_lng',
                             'geolocation_city', 'geolocation_state' ]].reset_index(drop=True)

In [225]:
locations = locations.groupby(['zip_code_prefix','geolocation_lat', 'geolocation_lng',
                             'geolocation_city', 'geolocation_state']).size().reset_index(name='counts').reset_index(drop=True)

In [226]:
import folium

In [227]:
locations = locations.drop(['geolocation_state', 'zip_code_prefix'], axis=1).reset_index(drop=True)

In [228]:
locations.head()

Unnamed: 0,geolocation_lat,geolocation_lng,geolocation_city,counts
0,-23.549032,-46.635313,sao paulo,1
1,-23.550116,-46.635122,sao paulo,2
2,-23.549819,-46.635606,sao paulo,6
3,-23.550524,-46.636694,sao paulo,2
4,-23.550393,-46.637302,sao paulo,4


In [248]:
from folium.plugins import FastMarkerCluster
from folium.plugins import MarkerCluster

In [251]:
m = folium.Map([-14.235, -51.925],
               zoom_start=4,
               min_lat=-33.75116944,
               max_lat=5.27438888,
               min_lon=-73.98283055,
               max_lon=-34.79314722)

callback = """\
function (row) {
    var icon, marker;
    icon = L.AwesomeMarkers.icon({
        icon: "map-marker", markerColor: "red"});
    marker = L.marker(new L.LatLng(row[0], row[1]));
    marker.setIcon(icon);
    return marker;
};
"""

m.add_child(FastMarkerCluster(locations[['geolocation_lat', 'geolocation_lng']].values.tolist(), callback=callback))
                

m