In [6]:
import pandas as pd
import numpy as np

# Set the number of rows
n = 10000

# Create the dataset with BP, LP, and BIKE columns
np.random.seed(42)
df_new = pd.DataFrame({
    'BP': np.random.randint(200, 301, size=n),  # BP values between 200 and 300 inclusive
    'LP': np.random.randint(400, 576, size=n),  # LP values between 400 and 575 inclusive
    'BIKE': np.random.randint(9, 12, size=n)    # BIKE values between 9 and 11 inclusive
})

# Function to assign group based on conditions
def assign_group_new(row):
    if row['BP'] >= 270 and row['LP'] >= 550 and row['BIKE'] >= 9:
        return 6
    elif row['BP'] >= 225 and row['LP'] >= 475 and row['BIKE'] >= 11:
        return 7
    elif row['BP'] >= 245 and row['LP'] >= 500 and row['BIKE'] >= 10:
        return 4
    elif row['BP'] >= 230 and row['LP'] >= 500 and row['BIKE'] >= 10:
        return 3
    elif row['BP'] >= 220 and row['LP'] >= 470 and row['BIKE'] >= 10:
        return 2
    elif row['BP'] >= 200 and row['LP'] >= 400 and row['BIKE'] >= 11:
        return 5
    elif row['BP'] >= 200 and row['LP'] >= 400 and row['BIKE'] >= 9:
        return 1
    else:
        return None  # No group assigned if conditions not met

# Assign groups to the dataset
df_new['Group'] = df_new.apply(assign_group_new, axis=1)

# Probability distributions for each group
group_probabilities_new = {
    1: [0.1, 0.3, 0.6],
    2: [0.2, 0.35, 0.45],
    3: [0.35, 0.3, 0.35],
    4: [0.4, 0.3, 0.3],
    5: [0.3, 0.6, 0.1],
    6: [0.3, 0.3, 0.4],
    7: [0.4, 0.45, 0.15]
}

# Function to assign grade based on group and probabilities
def assign_grade_new(group):
    if pd.isna(group):
        return None
    return np.random.choice(['A', 'B', 'C'], p=group_probabilities_new[group])

# Assign grades to the dataset
df_new['Grade'] = df_new['Group'].apply(assign_grade_new)

# Add 'Offense' and 'Defense' columns based on 'Grade'
def assign_offense_defense(grade):
    if grade == 'A':
        offense = np.random.randint(85, 101)
        defense = np.random.randint(90, 101)
    elif grade == 'B':
        offense = np.random.randint(85, 96)
        defense = np.random.randint(80, 100)
    elif grade == 'C':
        offense = np.random.randint(70, 86)
        defense = np.random.randint(65, 86)
    else:
        offense, defense = None, None
    return offense, defense

# Apply the offense and defense assignment function to the dataset
df_new[['Offense', 'Defense']] = df_new['Grade'].apply(assign_offense_defense).apply(pd.Series)

# Remove the 'Group' column, leaving only BP, LP, BIKE, Grade, Offense, and Defense
df_new = df_new.drop(columns=['Group'])

# Display the first 3 rows
print("First 3 Rows of the Dataset:")
print(df_new.head(3))

# Calculate percentage of each grade in the dataset
grade_counts = df_new['Grade'].value_counts(normalize=True) * 100
print("\nPercentage of Grades in the Dataset:")
print(grade_counts)


First 3 Rows of the Dataset:
    BP   LP  BIKE Grade  Offense  Defense
0  251  456     9     C       83       68
1  292  445    11     A       89      100
2  214  535    10     A       94       96

Percentage of Grades in the Dataset:
Grade
C    41.43
B    37.36
A    21.21
Name: proportion, dtype: float64


In [7]:
# Calculate the mean for BP, LP, BIKE, Offense, and Defense
mean_values = df_new[['BP', 'LP', 'BIKE', 'Offense', 'Defense']].mean()

# Display the mean values
print("Mean Values for BP, LP, BIKE, Offense, and Defense:")
print(mean_values)


Mean Values for BP, LP, BIKE, Offense, and Defense:
BP         249.8988
LP         486.5342
BIKE        10.0091
Offense     85.3519
Defense     84.6559
dtype: float64


In [8]:
# Define the file path for saving the CSV file (update the path for your system)
file_path = '/Users/steventuschman/Desktop/training_dataset.csv'

# Save the DataFrame to a CSV file
df_new.to_csv(file_path, index=False)

print(f"Dataset has been saved to: {file_path}")


Dataset has been saved to: /Users/steventuschman/Desktop/training_dataset.csv
