In [8]:
import numpy as np
import pandas as pd

# Define the size of the dataset
num_rows = 100

tasks = ["Demo", "Discussion", "Follow Up", "Review", "Planning"]
salespersons = ["Noel Thomas", "Alex Kim", "Maria Garcia", "John Doe", "Bruce Wayne"]

np.random.seed(0)  # For reproducibility
salesperson_data = np.random.choice(salespersons, size=num_rows)
task_data = np.random.choice(tasks, size=num_rows)

# Generating 'Priority' (1-5) and 'Opportunity Size' (random float between 1000 and 5000)
priority_data = np.random.randint(1, 6, size=num_rows)  # Integer values between 1 and 5
opportunity_size_data = np.random.uniform(1000, 5000, size=num_rows)  # Float values

# Generating 'Converted' (boolean)
converted_data = np.random.choice([True, False], size=num_rows)

# Creating DataFrame
synthetic_df = pd.DataFrame({
    'Salesperson': salesperson_data,
    'Task': task_data,
    'Priority': priority_data,
    'Opportunity Size': opportunity_size_data,
    'Converted': converted_data
})

synthetic_df.head()  # Display the first few rows of the dataset

Unnamed: 0,Salesperson,Task,Priority,Opportunity Size,Converted
0,Bruce Wayne,Follow Up,2,1853.247909,True
1,Noel Thomas,Review,4,3072.802856,False
2,John Doe,Follow Up,5,1102.650872,True
3,John Doe,Discussion,4,1829.880302,True
4,John Doe,Follow Up,4,2698.741875,False


In [9]:
from sklearn.datasets import make_regression

# Generate two additional numerical features
num_additional_features = 1
closingProb, _ = make_regression(n_samples=num_rows, n_features=num_additional_features, noise=0.1)

synthetic_df["Closing Probability"] = closingProb

synthetic_df.head()


Unnamed: 0,Salesperson,Task,Priority,Opportunity Size,Converted,Closing Probability
0,Bruce Wayne,Follow Up,2,1853.247909,True,-0.78881
1,Noel Thomas,Review,4,3072.802856,False,-1.610243
2,John Doe,Follow Up,5,1102.650872,True,0.217526
3,John Doe,Discussion,4,1829.880302,True,1.211364
4,John Doe,Follow Up,4,2698.741875,False,-0.47084


In [10]:
from sklearn.preprocessing import MinMaxScaler

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Apply MinMax scaling to the 'Feature_1' and 'Feature_2'
scaled_features = scaler.fit_transform(synthetic_df[["Closing Probability"]])

synthetic_df["Closing Probability"] = scaled_features

synthetic_df.head()

Unnamed: 0,Salesperson,Task,Priority,Opportunity Size,Converted,Closing Probability
0,Bruce Wayne,Follow Up,2,1853.247909,True,0.341904
1,Noel Thomas,Review,4,3072.802856,False,0.153761
2,John Doe,Follow Up,5,1102.650872,True,0.572397
3,John Doe,Discussion,4,1829.880302,True,0.800028
4,John Doe,Follow Up,4,2698.741875,False,0.414732
