# Setup

In [3]:
import pandas as pd
pd.options.plotting.backend = "plotly"
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interactive
import os

# Own functions
%load_ext autoreload
%autoreload 2

from models.helpers import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load data preprocessing

In [4]:
data_path = "data/data_processing.csv"
# Check if existing data
if os.path.exists(data_path):

    print("Exist data_processing.csv")
    printy("Start load")
    data = pd.read_csv(data_path, sep = "|")
    printg("Finish load")

else:
    raise Exception(f"{data_path} does not exist. First run data_pipeline.ipynb")

print("\t Total elements", len(data))

Exist data_processing.csv
[93mStart load[0m
[92mFinish load[0m
	 Total elements 164


In [5]:
data

Unnamed: 0,address,country,createdate,firstname,hs_object_id,industry,lastmodifieddate,lastname,phone,raw_email,technical_test___create_date,contry_found,city_found,email,fix_phone,names
0,"Blackpool Rue, 6576",Waterford,2023-05-15 02:39:02.002,Zoe,416102,Poultry and fish,2023-09-16T10:56:44.913Z,Owen,0-774-386-624,Zoe <zoe_owen450104633@acrit.org> Contact Info.,2021-07-13,Ireland,Waterford,zoe_owen450104633@acrit.org,(+353) 774386624,Zoe Owen
1,"Parkfield Avenue, 5340",Ireland,2023-05-15 02:39:02.003,Zara,413403,Fruit and vegetables,2023-09-16T10:55:08.803Z,Rodwell,6-777-367-783,Zara <zara_rodwell1398442854@nickia.com> Conta...,2021-01-09,Ireland,,zara_rodwell1398442854@nickia.com,(+353) 6777367783,Zara Rodwell
2,"Abourne Lane, 876",Ireland,2023-05-15 02:39:02.003,Zara,417951,Milling,2023-09-16T10:53:14.079Z,Freeburn,5-618-556-540,Zara <zara_freeburn1593147546@gmail.com> Conta...,2021-08-30,Ireland,,zara_freeburn1593147546@gmail.com,(+353) 5618556540,Zara Freeburn
3,"Chester Crossroad, 7070",Dublin,2023-05-15 02:39:02.003,Winnie,419852,Dairy products,2023-09-16T10:55:08.759Z,Walter,1-161-604-327,Winnie <winnie_walter538064895@sheye.org> Cont...,2021-02-10,Ireland,Dublin,winnie_walter538064895@sheye.org,(+353) 1161604327,Winnie Walter
4,"Tilloch Crossroad, 8332",Dublin,2023-05-15 02:39:02.003,Zoe,425352,Meat,2023-09-16T10:55:09.707Z,Owen,5-645-416-200,Zoe <zoe_owen1652446013@bungar.biz> Contact Info.,2021-11-02,Ireland,Dublin,zoe_owen1652446013@bungar.biz,(+353) 5645416200,Zoe Owen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,"Ashley Grove, 2963",Plymouth,2023-05-15 02:39:02.023,Tyson,430901,Animal feeds,2023-09-16T10:57:49.050Z,Kelly,1-881-672-830,Tyson <tyson_kelly212146485@eirey.tech> Contac...,2021-06-28,United Kingdom,Plymouth,tyson_kelly212146485@eirey.tech,(+44) 1881672830,Tyson Kelly
160,"Linden Route, 4037",England,2023-05-15 02:39:02.023,Tyson,431151,Animal feeds,2023-09-16T10:57:31.748Z,Kidd,6-142-208-418,Tyson <tyson_kidd2102553002@zorer.org> Contact...,2021-01-06,United Kingdom,England,tyson_kidd2102553002@zorer.org,(+44) 6142208418,Tyson Kidd
161,"Caroline Vale, 3046",London,2023-05-15 02:39:02.023,Tyson,431902,Fruit and vegetables,2023-09-16T10:57:23.082Z,Hilton,3-557-764-214,Tyson <tyson_hilton1508143043@liret.org> Conta...,2021-05-17,United Kingdom,London,tyson_hilton1508143043@liret.org,(+44) 3557764214,Tyson Hilton
162,"Aspen Boulevard, 6813",Dublin,2023-05-15 02:39:02.023,Tyson,440351,Milling,2023-09-16T10:57:31.748Z,Hilton,8-387-472-861,Tyson <tyson_hilton2096701114@eirey.tech> Cont...,2021-10-29,Ireland,Dublin,tyson_hilton2096701114@eirey.tech,(+353) 8387472861,Tyson Hilton


# Interactive graphs

## Compare industry by country

In [6]:
df_counts = data.groupby(["contry_found","industry"])["industry"].value_counts().reset_index()

fig = px.bar(df_counts, x="industry", y="count", color="contry_found",
             labels={"industry": "Industry", "count": "Count", "contry_found": "Country"},
             title="Count of Industries by Country",
             barmode="group")

# Mostrar el gráfico
fig.show()

## Compare distribution city by pais 

In [7]:
df_counts = data.groupby(["contry_found","city_found"])["city_found"].value_counts().reset_index()

fig = px.pie(df_counts, values="count", names="city_found", color="contry_found",
             labels={"city_found": "City", "count": "Count", "contry_found": "Country"},
             title="Count of City by Country")

fig.show()

## All in same window

In [8]:
def plot_graph(graph_type):
    if graph_type == 'Industries by Country':
        df_counts = data.groupby(["contry_found", "industry"])["industry"].value_counts().reset_index()
        fig = px.bar(df_counts, x="industry", y="count", color="contry_found",
                     labels={"industry": "Industry", "count": "Count", "contry_found": "Country"},
                     title="Count of Industries by Country",
                     barmode="group")
    elif graph_type == 'City by Country':
        df_counts = data.groupby(["contry_found", "city_found"])["city_found"].value_counts().reset_index()
        fig = px.pie(df_counts, values="count", names="city_found", color="contry_found",
                     labels={"city_found": "City", "count": "Count", "contry_found": "Country"},
                     title="Count of City by Country")
    else:
        fig = None

    if fig:
        fig.show()

In [10]:
graph_type_dropdown = widgets.Dropdown(
    options=["Select","Industries by Country", "City by Country"],
    value="Select",
    description="Graph:"
)

interactive_plot = interactive(plot_graph, graph_type=graph_type_dropdown)

interactive_plot

interactive(children=(Dropdown(description='Graph:', options=('Select', 'Industries by Country', 'City by Coun…