In [1]:
# Import the required modules
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Metrics
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, RocCurveDisplay

# Preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split

# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("dream_market_cocaine_listings.csv")
print(df.shape)
df.info()

(1504, 64)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1504 entries, 0 to 1503
Data columns (total 64 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Unnamed: 0               1504 non-null   int64  
 1   product_title            1504 non-null   object 
 2   ships_from_to            1504 non-null   object 
 3   grams                    1504 non-null   float64
 4   quality                  1504 non-null   float64
 5   btc_price                1504 non-null   float64
 6   cost_per_gram            1504 non-null   float64
 7   cost_per_gram_pure       1504 non-null   float64
 8   escrow                   1504 non-null   int64  
 9   product_link             1504 non-null   object 
 10  vendor_link              1504 non-null   object 
 11  vendor_name              1504 non-null   object 
 12  successful_transactions  1504 non-null   int64  
 13  rating                   1504 non-null   float64
 14  ships_from   

In [3]:
#Drop Blank, product link, vendor link Columns
df = df.drop(columns=['Unnamed: 0', 'vendor_link', 'product_link', 'escrow'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1504 entries, 0 to 1503
Data columns (total 60 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   product_title            1504 non-null   object 
 1   ships_from_to            1504 non-null   object 
 2   grams                    1504 non-null   float64
 3   quality                  1504 non-null   float64
 4   btc_price                1504 non-null   float64
 5   cost_per_gram            1504 non-null   float64
 6   cost_per_gram_pure       1504 non-null   float64
 7   vendor_name              1504 non-null   object 
 8   successful_transactions  1504 non-null   int64  
 9   rating                   1504 non-null   float64
 10  ships_from               1504 non-null   object 
 11  ships_to                 1504 non-null   object 
 12  ships_to_US              1504 non-null   bool   
 13  ships_from_US            1504 non-null   bool   
 14  ships_to_NL             

In [4]:
df.head()

Unnamed: 0,product_title,ships_from_to,grams,quality,btc_price,cost_per_gram,cost_per_gram_pure,vendor_name,successful_transactions,rating,ships_from,ships_to,ships_to_US,ships_from_US,ships_to_NL,ships_from_NL,ships_to_FR,ships_from_FR,ships_to_GB,ships_from_GB,ships_to_CA,ships_from_CA,ships_to_DE,ships_from_DE,ships_to_AU,ships_from_AU,ships_to_EU,ships_from_EU,ships_to_ES,ships_from_ES,ships_to_N. America,ships_from_N. America,ships_to_BE,ships_from_BE,ships_to_WW,ships_from_WW,ships_to_SI,ships_from_SI,ships_to_IT,ships_from_IT,ships_to_DK,ships_from_DK,ships_to_S. America,ships_from_S. America,ships_to_CH,ships_from_CH,ships_to_BR,ships_from_BR,ships_to_CZ,ships_from_CZ,ships_to_SE,ships_from_SE,ships_to_CO,ships_from_CO,ships_to_CN,ships_from_CN,ships_to_PL,ships_from_PL,ships_to_GR,ships_from_GR
0,!!!!!INTRO OFFER!!!!! 1GR COCAINE 90%,NL → EU,1.0,90.0,0.02577,0.02577,0.028633,Mister-Molly,90,4.63,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,!!!!!INTRO OFFER!!!!! 2GR COCAINE 90%,NL → EU,2.0,90.0,0.0515,0.02575,0.028611,Mister-Molly,90,4.63,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,!!!INTRO!!! 0.5G COCAINE 89% - STRAIGHT FROM T...,NL → EU,0.5,89.0,0.01649,0.03298,0.037056,0ldamsterdamm,620,4.94,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,!1G! C O L O M B I A N C O C A I N E - 89% PURITY,FR → EU,1.0,89.0,0.0412,0.0412,0.046292,lhomme-masquer,15,5.0,FR,EU,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,** 1 Gram 87% Pure Uncut Colombian Cocaine **,NL → WW,1.0,87.0,0.034,0.034,0.03908,SMOOTHCRIMINAL007,28,4.78,NL,WW,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [5]:
#Check for duplicates
df.duplicated().sum()

0

In [6]:
#Check for missing values
df.isnull().sum()

product_title              0
ships_from_to              0
grams                      0
quality                    0
btc_price                  0
cost_per_gram              0
cost_per_gram_pure         0
vendor_name                0
successful_transactions    0
rating                     0
ships_from                 0
ships_to                   0
ships_to_US                0
ships_from_US              0
ships_to_NL                0
ships_from_NL              0
ships_to_FR                0
ships_from_FR              0
ships_to_GB                0
ships_from_GB              0
ships_to_CA                0
ships_from_CA              0
ships_to_DE                0
ships_from_DE              0
ships_to_AU                0
ships_from_AU              0
ships_to_EU                0
ships_from_EU              0
ships_to_ES                0
ships_from_ES              0
ships_to_N. America        0
ships_from_N. America      0
ships_to_BE                0
ships_from_BE              0
ships_to_WW   

In [7]:
#Convert BTC to USD
btc_to_usd_rate = 8637813  # Set a fixed rate (adjust as needed)
df['usd_price'] = df['btc_price'] * btc_to_usd_rate

In [8]:
df[['btc_price', 'usd_price']].head()

Unnamed: 0,btc_price,usd_price
0,0.02577,222596.44101
1,0.0515,444847.3695
2,0.01649,142437.53637
3,0.0412,355877.8956
4,0.034,293685.642


In [9]:
#Analyze Vendor Performance
low_trans = df['successful_transactions'].quantile(0.33)
high_trans = df['successful_transactions'].quantile(0.66)

low_rating = df['rating'].quantile(0.33)
high_rating = df['rating'].quantile(0.66)

In [10]:
def categorize_performance(row):
    if row['successful_transactions'] <= low_trans and row['rating'] <= low_rating:
        return 'Low'
    elif row['successful_transactions'] > high_trans and row['rating'] > high_rating:
        return 'High'
    else:
        return 'Medium'

df['vendor_performance'] = df.apply(categorize_performance, axis=1)

In [11]:
df['vendor_performance'].value_counts()

vendor_performance
Medium    1236
Low        181
High        87
Name: count, dtype: int64

In [12]:
# Display the first few rows to confirm changes
print(df.shape)
df.head()

(1504, 62)


Unnamed: 0,product_title,ships_from_to,grams,quality,btc_price,cost_per_gram,cost_per_gram_pure,vendor_name,successful_transactions,rating,ships_from,ships_to,ships_to_US,ships_from_US,ships_to_NL,ships_from_NL,ships_to_FR,ships_from_FR,ships_to_GB,ships_from_GB,ships_to_CA,ships_from_CA,ships_to_DE,ships_from_DE,ships_to_AU,ships_from_AU,ships_to_EU,ships_from_EU,ships_to_ES,ships_from_ES,ships_to_N. America,ships_from_N. America,ships_to_BE,ships_from_BE,ships_to_WW,ships_from_WW,ships_to_SI,ships_from_SI,ships_to_IT,ships_from_IT,ships_to_DK,ships_from_DK,ships_to_S. America,ships_from_S. America,ships_to_CH,ships_from_CH,ships_to_BR,ships_from_BR,ships_to_CZ,ships_from_CZ,ships_to_SE,ships_from_SE,ships_to_CO,ships_from_CO,ships_to_CN,ships_from_CN,ships_to_PL,ships_from_PL,ships_to_GR,ships_from_GR,usd_price,vendor_performance
0,!!!!!INTRO OFFER!!!!! 1GR COCAINE 90%,NL → EU,1.0,90.0,0.02577,0.02577,0.028633,Mister-Molly,90,4.63,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,222596.44101,Low
1,!!!!!INTRO OFFER!!!!! 2GR COCAINE 90%,NL → EU,2.0,90.0,0.0515,0.02575,0.028611,Mister-Molly,90,4.63,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,444847.3695,Low
2,!!!INTRO!!! 0.5G COCAINE 89% - STRAIGHT FROM T...,NL → EU,0.5,89.0,0.01649,0.03298,0.037056,0ldamsterdamm,620,4.94,NL,EU,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,142437.53637,Medium
3,!1G! C O L O M B I A N C O C A I N E - 89% PURITY,FR → EU,1.0,89.0,0.0412,0.0412,0.046292,lhomme-masquer,15,5.0,FR,EU,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,355877.8956,Medium
4,** 1 Gram 87% Pure Uncut Colombian Cocaine **,NL → WW,1.0,87.0,0.034,0.034,0.03908,SMOOTHCRIMINAL007,28,4.78,NL,WW,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,293685.642,Low


In [13]:
# Determine the number of unique values in each column.
unique_values = df.nunique()
print(unique_values)

product_title         1504
ships_from_to           57
grams                   63
quality                 28
btc_price              815
                      ... 
ships_from_PL            1
ships_to_GR              1
ships_from_GR            1
usd_price              815
vendor_performance       3
Length: 62, dtype: int64


In [14]:
# Save the clean data
df.to_csv("cocaine_listing_clean.csv", index=False)