# 📊 Interactive EDA with Plotly

Explore escort profile data using interactive visualizations.

In [32]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

df = pd.read_csv("../data/processed/all_profiles.csv")
df.replace("", pd.NA, inplace=True)
df["desc_length"] = df["description"].astype(str).apply(len)
df["title_length"] = df["title"].astype(str).apply(len)
df.head()

Unnamed: 0,title,description,profile_url,phone,age,location,image_url,category,source,desc_length,title_length
0,FUN HOTT Aussie Chick CJ keen 2 find some new ...,"Hey, I m Chloe Janee I m 18 recentlyt just sta...",https://www.locanto.com.au/brisbane/ID_7554557...,,18.0,,,femaleEscort,,297,60
1,Quickly Blow And Go Service,"Text me 0451885118 Hi guy, I am a good sucker ...",https://www.locanto.com.au/vic/ID_7570504461/Q...,451885118.0,20.0,,,femaleEscort,,283,27
2,Vermont New Arrived! Beautiful face Sexy figur...,Hi Boys I m Hannah. A Model good looking girl....,https://www.locanto.com.au/vic/ID_7569608635/V...,,,,,femaleEscort,,301,64
3,Outdoor fun anyone?,Looking to meet someone. Find what you’re look...,https://www.locanto.com.au/g/dol/signup/?utm_s...,,,,,femaleEscort,,57,19
4,INDIAN PUNJABI COLLEGE GIRL AVAILABLE FOR YOU,Text me on WhatsApp 61489043930 Text me on Wha...,https://www.locanto.com.au/melbourne/ID_756891...,,,,,femaleEscort,,63,45


## 📍 Location Distribution

In [33]:
import plotly.express as px
import pandas as pd

top_locs = df["location"].value_counts().head(20).reset_index()
top_locs.columns = ["location", "count"]

fig = px.bar(top_locs, x="location", y="count", title="Top 20 Locations")
fig.write_html("outputs/top_20_locations.html")

print("✅ Saved bar chart to outputs/top_20_locations.html")


✅ Saved bar chart to outputs/top_20_locations.html


## 🏷️ Category Counts

In [34]:
import plotly.express as px
import pandas as pd

# Count by category
cat_counts = df["category"].value_counts().reset_index()
cat_counts.columns = ["category", "count"]

# Create bar plot
fig = px.bar(cat_counts, x="category", y="count", title="Ad Count by Category")

# Save to HTML
fig.write_html("outputs/category_counts.html")

print("✅ Saved chart to outputs/category_counts.html")

✅ Saved chart to outputs/category_counts.html


## 🎂 Age Distribution

In [39]:
fig = px.histogram(df.dropna(subset=["age"]), x="age", nbins=20, title="Age Distribution")
fig.write_html("outputs/age_distribution.html")

print("✅ Saved histogram to outputs/age_distribution.html")

✅ Saved histogram to outputs/age_distribution.html


## ✏️ Description Length by Category

In [43]:
import plotly.express as px

# Make sure `desc_length` is numeric and not missing
df = df.dropna(subset=["desc_length", "category"])

fig = px.box(
    df,
    x="category",
    y="desc_length",
    title="Description Length by Category"
)

# ✅ Save to HTML instead of inline display
fig.write_html("outputs/desc_length_by_category.html")

print("✅ Box plot saved to outputs/desc_length_by_category.html")


✅ Box plot saved to outputs/desc_length_by_category.html


## ☎️ Most Reused Phone Numbers

In [46]:
import plotly.express as px

# Top reused phone numbers
top_phones = df["phone"].value_counts().head(15).reset_index()
top_phones.columns = ["phone", "count"]

# Bar chart
fig = px.bar(
    top_phones,
    x="phone",
    y="count",
    title="Top 15 Most Reused Phone Numbers"
)

# ✅ Save to outputs folder
fig.write_html("outputs/top_reused_phones.html")

print("✅ Bar chart saved to outputs/top_reused_phones.html")


✅ Bar chart saved to outputs/top_reused_phones.html


## 📍 Geographic Keywords from Location

In [47]:
df["location"].str.lower().value_counts().head(15)

location
melbourne                               129
melbourne cbd                             9
brisbane                                  8
sydney                                    6
brunswick                                 3
richmond                                  2
cbd                                       2
surry hills                               1
melbourne cbd/ inner & outer suburbs      1
perth                                     1
geelong                                   1
ballarat                                  1
online only                               1
brisbane or melbourne                     1
balaclava                                 1
Name: count, dtype: int64