# Data visualization using Pandas and Folium

S.Yu. Papulin (papulin.study@yandex.ru)

### Contents
 - [Preparation](#Preparation)
 - [Basic operations over dataframes](#Basic-operations-over-dataframes)
 - [Applying user-defined function](#Applying-user-defined-function)
 - [Plotting data onto map](#Plotting-data-onto-map)

## Preparation

Import modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
%matplotlib inline

In [None]:
# !pip install --upgrade pandas==0.23

**Installing some additional modules if needed**

In [None]:
# Module for processing geodata as DataFrames
# !pip install geopandas==0.8

In [None]:
# import geopandas as gpd
# from shapely.geometry import Point, Polygon, MultiPolygon
# from geopandas.tools import sjoin

In [None]:
# Module for map visualization
!pip install folium

In [None]:
import folium
from folium.plugins import HeatMap

In [None]:
# https://github.com/python-visualization/folium/issues/812
def embed_map(m):
    from IPython.display import IFrame
    m.save('index.html')
    return IFrame('index.html', width='100%', height='750px')

## Basic operations over dataframes

In [None]:
FILE = "../data/data-4275-2021-02-09.xlsx"

In [None]:
CLMNS = ["Name", "TypeObject", "District", "Address", "SeatsCount", "geoData"]

In [None]:
# Create a dataframe of places
df = pd.read_excel(FILE, index_col=0)
df.head()

In [None]:
# Select only needed columns
df = df[CLMNS]
df.head()

In [None]:
# Alternative way using read_excel
df = pd.read_excel(FILE, 
                   index_col=0, 
                   usecols=["ID",] + CLMNS)  # or you can use indices [0, 1, 5, 7, 8, 10, 14]
df.head()

Print types of public catering

In [None]:
df["TypeObject"].unique()

Find places on Bauman street:

In [None]:
df_baum = df[df["Address"].str.lower().str.contains("бауманск")]
df_baum.head()

Print out top-5 places for each type by their number of seats in descending order

In [None]:
df.sort_values("SeatsCount", ascending=[0])\
    .groupby("TypeObject")\
    .head(5)

In [None]:
# Grouped output
df.sort_values(["TypeObject", "SeatsCount"], ascending=[1, 0])\
    .groupby("TypeObject")\
    .head(5)

Select and filter

In [None]:
CLMNS = ["Name", "Address"]
QUERY = "SeatsCount == 35"

In [None]:
# Option 1: Using loc
df.loc[
    df["SeatsCount"] == 35,  # filter
    CLMNS  # select columns
].head()

In [None]:
# Option 2: Using query
df.query(QUERY)[CLMNS].head()

In [None]:
# Option 3(a): Using pipe
df.query(QUERY)\
    .pipe(lambda df: df[CLMNS])\
    .head()

In [None]:
# Option 3(b): Using pipe
def select_columns(df, clmns):
    return df[clmns]

def filter_rows(df, condition):
    return df.query(condition)

In [None]:
df_new = df\
    .pipe(filter_rows, condition=QUERY)\
    .pipe(select_columns, clmns=CLMNS)\
    .head()
df_new.head()

In [None]:
# Alternative way
df_new = (
    df
    .pipe(filter_rows, condition=QUERY)
    .pipe(select_columns, clmns=CLMNS)
)
df_new.head()

## Applying user-defined function

### Extracting place locations

Print out a single line of the geoData column

In [None]:
df.iloc[0]["geoData"]

In [None]:
import re
import json

In [None]:
def extract_coordinates(geo_data):
    """Parse geoData value."""
    try:
        geo_data_ = re.sub("(\w+)=(\w+), (\w+)=", r'"\1": "\2", "\3": ', geo_data)
        return json.loads(geo_data_)["coordinates"]
    except:
        pass

In [None]:
# Create a new dataframe with lat, lng columns
df_coords = df.apply(
    lambda x: pd.Series(
        extract_coordinates(x["geoData"]), 
        index=["Lng", "Lat"]
    ),
    axis=1, 
    result_type="expand"
)

df_coords.head()

In [None]:
# Option 1: Merge the initial dataframe and the dataframe with coordinates
df_ext = df.merge(df_coords, left_index=True, right_index=True)
df_ext.head()

In [None]:
# Option 2: Concatenate the initial dataframe and the dataframe with coordinates
df_ext = pd.concat([df, df_coords], axis=1, sort=False)
df_ext.head()

### Calculating distance

Find your location by your ip address

In [None]:
import requests

In [None]:
# URL for getting your public IP
IP_SERVICE_URL = "https://api.myip.com"

# URL for getting your location by your IP
LOCATION_SERVICE_URL = "http://ip-api.com/json/{}"

Getting your location by IP

In [None]:
# Request your public IP
ip__response = requests.get(IP_SERVICE_URL)
ip__response__data = ip__response.json()
ip = ip__response__data["ip"]


# Request your location
location__response = requests.get(LOCATION_SERVICE_URL.format(ip))
location__response__data = location__response.json()

# Extract lat and lng
location = location__response__data["lat"], location__response__data["lon"]
location

Computing distances between your location and places

In [None]:
def calculate_haversine(*coords):
    """
    Calculate a distance between points.
    
    Note: (start lat, start lng, end lat, end lng)
    """
    if len(coords) != 4:
        return
    radius = 6371.0
    coords_radians = np.radians(coords)
    return 2 * radius * np.arcsin(
        np.sqrt(
            np.sin((coords_radians[2]-coords_radians[0])/2.0)**2 +
            np.cos(coords_radians[0])*np.cos(coords_radians[2])*np.sin(
                (coords_radians[3]-coords_radians[1])/2.0)**2
        )
    )

In [None]:
s_distance = df_ext.apply(lambda row: calculate_haversine(
    location[0], 
    location[1], 
    row["Lat"], 
    row["Lng"]), axis=1).rename("distance")

In [None]:
# Add the distance column
df_ext_ = df_ext.merge(s_distance, left_index=True, right_index=True)
df_ext_.head()

In [None]:
# df_ext_ = pd.concat([df_ext, s_distance], axis=1, sort=False)
# df_ext_.head()

In [None]:
# Places within 3km radius
df_less_3km = df_ext_.query("distance < 3")
df_less_3km.head()

In [None]:
# 5 nearest places to your location
df_less_3km\
    .sort_values("distance")\
    .head(5)

## Plotting data onto map

In [None]:
# Columns to use
CLMNS = ["Name", "Lat", "Lng"]

In [None]:
df_coords = df_ext[CLMNS]
df_coords.head()

Plotting city zones

In [None]:
# Path where city geojson is located
MOSCOW_ZONES_PATH = "../data/mo.geojson"

In [None]:
with open(MOSCOW_ZONES_PATH) as f:
    zones_geojson = json.load(f)

In [None]:
# Single zone
zones_geojson["features"][0]

In [None]:
# Create an instance of map
m = folium.Map()

In [None]:
# Plot zones onto map
style_function = lambda x: {
    "color" : "orange",
    "weight": 1
}

folium.GeoJson(MOSCOW_ZONES_PATH, name="geojson", style_function=style_function).add_to(m)
m.fit_bounds(m.get_bounds())
embed_map(m)

In [None]:
# Create markers for places
for indx, row in df_coords.iterrows():
    try:
        folium.Circle(
            radius=5,
            location=[row["Lat"], row["Lng"]],
            popup=row["Name"],
            color="red",
            fill=True,
            fill_opacity=1.0
        ).add_to(m)
    except:
        pass

In [None]:
# Create a marker for your location
folium.Marker(location, icon=folium.Icon(color="darkblue", 
                                         icon_color="white", 
                                         prefix="fa", 
                                         icon="user")).add_to(m)

In [None]:
# Plot map with all elements
embed_map(m)