In [1]:
import pandas as pd
from random import randint
import matplotlib.pyplot as plt
import numpy as np


# Create a variable for data file name
output_filename = "../data/jadavpur_students.csv"

# Initialize empty list
data = []

# Number of data samples
N = 10000

# Create normal distribution of male weights
mu = 65 # mean
sigma = 6 # standard deviation
male_weights = np.random.normal(mu, sigma, N)


# Create normal distribution of female weights
mu = 55 # mean
sigma = 6 # standard deviation
female_weights = np.random.normal(mu, sigma, N)

# Iterate from 0 to N-1
for i in range(N):

    # Create a dummy person, 
    person = {
        "id": i+1,
        "weight": 0,
        "gender": "F"
    }

    # Alternate gender between male and female
    person["gender"] = "M" if i % 2 == 0 else "F"
    person["weight"] = male_weights[randint(0, N-1)] if i % 2 == 0 else female_weights[randint(0, N-1)]

    # Append to list
    data.append(person)

# -----------------------------------------------

# Create data frame
df = pd.DataFrame(data)

# Write data to a .csv file
df.to_csv(output_filename, index=False)


In [2]:
from ipywidgets import interact

def interactive_plot(sample_size, bins):

    df = pd.read_csv(output_filename)

    df = df.sample(sample_size)
    df.shape
    
    # For weights
    males = df.loc[(df["gender"] == "M")]
    females = df.loc[(df["gender"] == "F")]

    figure, ax = plt.subplots(2, 1)

    style = {'edgecolor': 'C3', 'linewidth': 0.5}

    plt.rcParams["figure.autolayout"] = True
    plt.rcParams["figure.figsize"] = [9, 7]


    plt.xlabel('Weight (kilos)')
    plt.title("")

    df.hist("weight", label="All", ax=ax[0], bins=bins, **style)
    males.hist("weight", label="M", ax=ax[1], bins=bins, **style)
    females.hist("weight", label="F", ax=ax[1], bins=bins, **style)

    plt.legend(loc='upper right')

    # Combine all the operations and display
    plt.show()

In [3]:
interact(interactive_plot, sample_size = (10, 10000, 1), bins=(3, 20, 1))

interactive(children=(IntSlider(value=5005, description='sample_size', max=10000, min=10), IntSlider(value=11,…

<function __main__.interactive_plot(sample_size, bins)>