# Library of Queries to be used for Analysis

Corre estes Queries de forma a obter dados relevantes para análise 

## I. Importar Bibliotecas

In [8]:
# Bibliotecas Gerais
import numpy as np
import pandas as pd
import random
# Bibliotecas Análise Gráfica
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
import folium
from folium.plugins import MarkerCluster
from folium.features import ClickForMarker
from jinja2 import Template
# Outras Bibliotecas
from matplotlib.cbook import MatplotlibDeprecationWarning
import warnings
warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)



In [12]:
# Leitura dos Dados Raw
cities_path = 'https://raw.githubusercontent.com/E-man85/projectII/main/rawData/cities.csv'
product_path = 'https://raw.githubusercontent.com/E-man85/projectII/main/rawData/product.csv'
sales_path = r'C:\Users\Egomes\Desktop\PG_Analytics_Data_science_empresarial\Isla_gaia\14-ProjetoII\projetoII\rawData\sales.csv'

raw_cities = pd.read_csv(cities_path)
raw_product = pd.read_csv(product_path)
raw_sales = pd.read_csv(sales_path, dtype={'promo_bin_2': str, 'promo_discount_type_2': str})

## II. Informação Geográfica

#### Adicionar Informação Geografica das Lojas

In [13]:
def get_coordinates(city):

    geolocator = Nominatim(user_agent="app_lat_long")
    location = geolocator.geocode(city + ', Turkey', addressdetails=True)

    if location:
        address = location.raw.get('address', {})
        district = address.get('town') or address.get('suburb') or address.get('city_district') or address.get('hamlet')
        region = address.get('state') or address.get('region')
        return location.latitude, location.longitude, district, region
    
    else:
        
        return None, None, None, None

city_mapping = {
    'Denizli': 'Denizli',
    'Ankara': 'Ankara',
    '?zmir': 'İzmir',
    'Bursa': 'Bursa',
    'Antalya': 'Antalya',
    'Konya': 'Konya',
    'Adana': 'Adana',
    'Sanl?urfa': 'Şanlıurfa',
    'Gaziantep': 'Gaziantep',
    'Mersin': 'Mersin',
    'Diyarbak?r': 'Diyarbakır',
    'Kayseri': 'Kayseri',
    'Samsun': 'Samsun',
    'Istanbul': 'Istanbul',
    'Eski?ehir': 'Eskişehir',
    'Adapazar?': 'Adapazarı',
    'Kahramanmaras': 'Kahramanmaraş',
    'Erzurum': 'Erzurum',
    'Van': 'Van'
}

# Add the "city_code_corrected" column with the corrected city names
raw_cities['city_code_corrected'] = raw_cities['city_code'].map(city_mapping)

# Split "geolocation" column into "latitude" and "longitude" columns
raw_cities[['latitude', 'longitude', 'district', 'region']] = raw_cities['city_code_corrected'].apply(get_coordinates).apply(pd.Series)

raw_cities.to_csv('raw_cities_complete.csv')

raw_cities.head()


Unnamed: 0,store_id,storetype_id,store_size,city_id_old,country_id,city_code,city_code_corrected,latitude,longitude,district,region
0,S0036,ST04,21,C001,TURKEY,Denizli,Denizli,37.783315,29.084483,Merkezefendi,Ege Bölgesi
1,S0005,ST04,19,C001,TURKEY,Denizli,Denizli,37.783315,29.084483,Merkezefendi,Ege Bölgesi
2,S0104,ST04,47,C002,TURKEY,Ankara,Ankara,39.920789,32.854048,Çankaya,İç Anadolu Bölgesi
3,S0068,ST03,14,C003,TURKEY,?zmir,İzmir,38.422455,27.13107,Konak,Ege Bölgesi
4,S0086,ST03,12,C003,TURKEY,?zmir,İzmir,38.422455,27.13107,Konak,Ege Bölgesi


#### Vendas por Localização Geográfica

In [14]:
view_vendasLocal = pd.merge(raw_cities, raw_sales, how='inner', on='store_id')

In [15]:
all_data = pd.merge(view_vendasLocal, raw_product, how='inner', on='product_id')

##### Revenue por Cidade

In [16]:
revenue_per_city = view_vendasLocal.groupby(['city_code_corrected', 'latitude', 'longitude'])['revenue'].sum().reset_index()

In [17]:
revenue_per_city.to_csv('Revenue por Cidade.csv')

##### Revenue por Distrito

In [18]:
revenue_per_district = view_vendasLocal.groupby(['district', 'latitude', 'longitude'])['revenue'].sum().reset_index()

In [19]:
revenue_per_district.to_csv('Revenue por Distrito.csv')

##### Revenue por Cidade e Produto

In [20]:
view_vendasProdutoLocal = all_data.groupby(['city_code_corrected', 'product_id', 'latitude', 'longitude'])['revenue'].sum().reset_index()

In [21]:
view_vendasProdutoLocal.to_csv('Vendas por Produto por Cidade.csv')

## III. Análise Gráfica

##### Vendas por Cidade

In [22]:
# Create a custom tile layer using a real-life image
tile_layer = folium.TileLayer(
    tiles='https://your-image-url.jpg',
    attr='Attribution Text',
    name='Real-life Background',
    overlay=True,
    control=False
)

# Create the map with the custom tile layer
map_chart = folium.Map(location=[39.9334, 32.8597], zoom_start=6, control_scale=True)
tile_layer.add_to(map_chart)

# Calculate the minimum and maximum revenue values
min_revenue = revenue_per_district['revenue'].min()
max_revenue = revenue_per_district['revenue'].max()

# Iterate over the DataFrame rows
for index, row in revenue_per_district.iterrows():
    # Get the city code, latitude, longitude, and revenue for each row
    city_code = row['district']
    latitude = row['latitude']
    longitude = row['longitude']
    revenue = row['revenue']

    # Calculate the radius based on the revenue using a proportional scaling
    radius = np.interp(revenue, (min_revenue, max_revenue), (5, 20))

    # Create a CircleMarker for each city
    folium.CircleMarker(
        location=[latitude, longitude],
        radius=radius,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6,
        weight=1,
        tooltip=f"City: {city_code}<br>Revenue: {revenue}"
    ).add_to(map_chart)

# Display the map chart
map_chart

##### Produto mais vendido por Cidade

In [23]:
# Group the data by city and find the most sold product in terms of revenue
most_sold = view_vendasProdutoLocal.groupby('city_code_corrected').apply(lambda x: x.loc[x['revenue'].idxmax()])

# Create a folium map centered around the first city
map = folium.Map(location=[most_sold.iloc[0]['latitude'], most_sold.iloc[0]['longitude']], zoom_start=5, min_zoom=4, max_zoom=10)

# Create a dictionary to store product IDs and their corresponding colors
product_colors = {}

# Iterate over the most sold products and assign colors to product IDs
for _, row in most_sold.iterrows():
    product_id = row['product_id']
    
    # Check if the product ID already has a color assigned
    if product_id in product_colors:
        color = product_colors[product_id]
    else:
        # Generate a random color if the product ID doesn't have a color assigned
        color = f"#{random.randint(0, 0xFFFFFF):06x}"
        product_colors[product_id] = color
    
    latitude = row['latitude']
    longitude = row['longitude']
    revenue = row['revenue']
    
    # Reduce the size of the bubbles (adjust the divisor as needed)
    radius = 10
    
    # Create a marker with a popup showing the revenue
    folium.CircleMarker(
        location=[latitude, longitude],
        radius=radius,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.6,
        popup=f"Product: {product_id}<br>Revenue: {revenue}"
    ).add_to(map)

# Create a legend based on the product IDs and their colors
legend_template = Template('''
<div style="position: fixed; bottom: 50px; left: 50px; z-index: 1000; background-color: rgba(255, 255, 255, 0.8);
            padding: 10px; border-radius: 5px; box-shadow: 0 2px 5px rgba(0, 0, 0, 0.3); font-family: Arial, sans-serif;">
    <h4 style="margin-top: 0; margin-bottom: 10px; font-size: 16px;">Legend</h4>
    {% for product, color in product_colors.items() %}
    <div style="display: flex; align-items: center; margin-bottom: 5px;">
        <div style="background-color: {{ color }}; width: 20px; height: 20px; border-radius: 50%; margin-right: 5px;"></div>
        <span style="font-size: 14px;">{{ product }}</span>
    </div>
    {% endfor %}
</div>
''')

# Render the legend HTML using the product_colors dictionary
legend_html = legend_template.render(product_colors=product_colors)

# Add the legend HTML to the map
map.get_root().html.add_child(folium.Element(legend_html))

# Save the map as an HTML file
map