In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from pyspark.sql import SparkSession
from matplotlib.lines import Line2D
from DataAnalyzer import DataAnalyzer
spark = SparkSession.builder.appName("DataAnalyzing").getOrCreate()

In [None]:
my_analyzer = DataAnalyzer(spark)
my_analyzer.load_data()

Setup SQL Database for alerting system.

In [None]:
spark.sql("CREATE DATABASE IF NOT EXISTS orders.analyzer")

In [None]:
my_analyzer.analyze()

Visualizations :

Fraud Suspected : 

In [None]:
if my_analyzer.fraud_df.shape[0] > 0 :
    plt.figure(figsize=(12, 8))
    sns.heatmap(my_analyzer.fraud_df.pivot('window_day', 'window_hour', 'number_of_customers'), annot=True, fmt='.0f', cmap='YlGnBu')
    plt.title('Number of Customers suspected of fraud per hour')
    plt.xlabel('Hour')
    plt.ylabel('Day')
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.show()

Client per Segment:

In [None]:
plt.figure(figsize=(8, 8))
plt.pie(my_analyzer.segment_df['count'], labels=my_analyzer.segment_df['segment'], autopct='%1.1f%%', startangle=140)
plt.title('Count per Group Segment')
plt.show()

Orders per cities: 

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x='city', y='count', data=my_analyzer.cities_per_orders, palette='viridis')
plt.title('Customer Count per City')
plt.xlabel('City')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

Amount per countries:

In [None]:
# Load world map data from geopandas datasets
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge your data with the world geometries
merged = world.merge(my_analyzer.countries_per_amount, how='left', left_on='name', right_on='country')

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))
merged.plot(column='sum(totalAmountUSD)', cmap='OrRd', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)

# Customize plot
ax.set_title('Amount by Country')
ax.set_axis_off()

plt.show()

Amounts per payment method:

In [None]:
my_analyzer.amount_per_payment_method.head()

In [1]:
# Pivot the dataframe for easy plotting
pivot_df = my_analyzer.amount_per_payment_method.pivot(index='my_date', columns='paymentMethod', values='sum(totalAmountUSD)')

# Plotting
pivot_df.plot(kind='bar', stacked=True, figsize=(10, 6))
plt.title('Total Amount per Day per Payment Method')
plt.xlabel('Date')
plt.ylabel('Total Amount (USD)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

NameError: name 'my_analyzer' is not defined