In [1]:
import pandas as pd

# Create the DataFrame
data = {
    'GPA': [3.5, 3.8, 2.9, 3.0, 3.7, 3.2, 3.9, 2.7, 3.3, 3.6],
    'sports': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Encode the 'sports' column: 'Yes' -> 1, 'No' -> 0
df['sports_encoded'] = df['sports'].map({'Yes': 1, 'No': 0})

# Calculate the correlation between GPA and the encoded sports variable
correlation = df['GPA'].corr(df['sports_encoded'])

print(f"Correlation between GPA and sports participation: {correlation}")


Correlation between GPA and sports participation: 0.2595870658255824


In [2]:
import pandas as pd
import plotly.express as px

# Create the DataFrame with multiple extracurricular activities
data = {
    'GPA': [3.5, 3.8, 2.9, 3.0, 3.7, 3.2, 3.9, 2.7, 3.3, 3.6],
    'sports': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'music': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes'],
    'drama': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No'],
    'volunteering': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
}

df = pd.DataFrame(data)

# Encode the categorical variables
df['sports_encoded'] = df['sports'].map({'Yes': 1, 'No': 0})
df['music_encoded'] = df['music'].map({'Yes': 1, 'No': 0})
df['drama_encoded'] = df['drama'].map({'Yes': 1, 'No': 0})
df['volunteering_encoded'] = df['volunteering'].map({'Yes': 1, 'No': 0})

# Calculate the correlation matrix
correlation_matrix = df[['GPA', 'sports_encoded', 'music_encoded', 'drama_encoded', 'volunteering_encoded']].corr()

# Rename the columns for better readability in the heatmap
correlation_matrix.columns = ['GPA', 'Sports', 'Music', 'Drama', 'Volunteering']
correlation_matrix.index = ['GPA', 'Sports', 'Music', 'Drama', 'Volunteering']

# Create a heatmap using Plotly Express
fig = px.imshow(correlation_matrix, 
                text_auto=True, 
                color_continuous_scale='RdBu', 
                title="Correlation Heatmap: GPA and Extracurricular Activities")

# Show the heatmap
fig.show()


In [3]:
import plotly.graph_objects as go
import numpy as np

# Generate synthetic GPA data
np.random.seed(0)
gpa_data = np.random.normal(3.0, 0.5, 1000)  # Normal distribution centered around 3.0

# Create histogram
fig = go.Figure(data=[go.Histogram(x=gpa_data, nbinsx=30)])
fig.update_layout(title='Distribution of GPAs',
                  xaxis_title='GPA',
                  yaxis_title='Frequency')
fig.show()

In [4]:
import pandas as pd
import plotly.graph_objects as go

# Example DataFrame
data = {
    'extracurricular': [1, 0, 1, 1, 0],
    'sports': [1, 1, 0, 1, 1],
    'music': [0, 1, 1, 0, 1],
    'volunteering': [1, 1, 1, 0, 0]
}
df = pd.DataFrame(data)

# Calculate participation and non-participation counts
participation_counts = df[['extracurricular', 'sports', 'music', 'volunteering']].sum()
non_participation_counts = (df[['extracurricular', 'sports', 'music', 'volunteering']] == 0).sum()

# Combine counts into one DataFrame for plotting
counts_df = pd.DataFrame({
    'Activity': participation_counts.index,
    'Participation': participation_counts.values,
    'Non-Participation': non_participation_counts.values
})

# Create bar plot
fig = go.Figure()

# Add bars for participation
fig.add_trace(go.Bar(
    name='Participation',
    x=counts_df['Activity'],
    y=counts_df['Participation']
))

# Add bars for non-participation
fig.add_trace(go.Bar(
    name='Non-Participation',
    x=counts_df['Activity'],
    y=counts_df['Non-Participation']
))

# Update layout
fig.update_layout(
    title='Number of Students Participating and Not Participating in Activities',
    xaxis_title='Activities',
    yaxis_title='Number of Students',
    barmode='stack'  # Stack bars for better comparison
)

# Show plot
fig.show()


In [5]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Set a random seed for reproducibility
np.random.seed(42)

# Generate random data
data = {
    'extracurricular': np.random.randint(0, 2, size=100),
    'sports': np.random.randint(0, 2, size=100),
    'music': np.random.randint(0, 2, size=100),
    'volunteering': np.random.randint(0, 2, size=100)
}
df = pd.DataFrame(data)

# Determine participation status
df['total_activities'] = df[['extracurricular', 'sports', 'music', 'volunteering']].sum(axis=1)
df['participating_in_at_least_one'] = df['total_activities'] > 0
df['participating_in_more_than_one'] = df['total_activities'] > 1
df['participating_in_all'] = df['total_activities'] == 4
df['not_participating_at_all'] = df['total_activities'] == 0

# Count the number of students participating in each category
participating_count = df['participating_in_at_least_one'].sum()
more_than_one_count = df['participating_in_more_than_one'].sum()
all_activities_count = df['participating_in_all'].sum()
not_participating_count = df['not_participating_at_all'].sum()

# Prepare the data for plotting
categories = [
    'Participating in All Activities',
    'Participating in More Than One Activity',
    'Participating in At Least One Activity',
    'Not Participating at All'
]
counts = [all_activities_count, more_than_one_count, participating_count, not_participating_count]

# Create bar plot
fig = go.Figure(data=[go.Bar(
    x=categories,
    y=counts,
    marker_color=['#FF5733', '#AB802D', '#1E303C', '#D9BC61']  # Different colors for each category
)])

# Update layout
fig.update_layout(
    title='Comparison of Students by Participation in Activities',
    xaxis_title='Student Participation Status',
    yaxis_title='Number of Students'
)

# Show plot
fig.show()

In [9]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

# Example DataFrame with GPA
data = {
    'extracurricular': np.random.randint(0, 2, size=100),
    'sports': np.random.randint(0, 2, size=100),
    'music': np.random.randint(0, 2, size=100),
    'volunteering': np.random.randint(0, 2, size=100),
    'GPA': np.round(np.random.uniform(2.0, 4.0, size=100), 2)
}
df = pd.DataFrame(data)

# Determine participation status
df['total_activities'] = df[['extracurricular', 'sports', 'music', 'volunteering']].sum(axis=1)
df['participating_in_all'] = df['total_activities'] == len(df.columns)
df['participating_in_more_than_one'] = df['total_activities'] > 1
df['participating_in_at_least_one'] = df['total_activities'] > 0
df['not_participating_at_all'] = df['total_activities'] == 0

# Count the number of students in each category
all_activities_count = df['participating_in_all'].sum()
more_than_one_count = df['participating_in_more_than_one'].sum()
at_least_one_count = df['participating_in_at_least_one'].sum()
not_participating_count = df['not_participating_at_all'].sum()

# Calculate average GPA for each category
average_gpa = {
    'Participating in All Activities': df[df['participating_in_all']]['GPA'].mean(),
    'Participating in More Than One Activity': df[df['participating_in_more_than_one']]['GPA'].mean(),
    'Participating in At Least One Activity': df[df['participating_in_at_least_one']]['GPA'].mean(),
    'Not Participating at All': df[df['not_participating_at_all']]['GPA'].mean()
}

# Prepare data for plotting
categories = list(average_gpa.keys())
counts = [all_activities_count, more_than_one_count, at_least_one_count, not_participating_count]
avg_gpa = [average_gpa[category] for category in categories]

# Create bar plot for counts
fig = go.Figure()

fig.add_trace(go.Bar(
    x=categories,
    y=counts,
    name='Number of Students',
    marker_color='#1E303C'
))

# Add line plot for average GPA
fig.add_trace(go.Scatter(
    x=categories,
    y=avg_gpa,
    mode='lines+markers',
    name='Average GPA',
    marker_color='#AB802D'
))

# Update layout
fig.update_layout(
    title='Comparison of Students by Participation in Activities and Average GPA',
    xaxis_title='Student Participation Status',
    yaxis_title='Number of Students / Average GPA',
    yaxis=dict(
        title='Number of Students',
        titlefont=dict(color='#1E303C'),
        tickfont=dict(color='#1E303C')
    ),
    yaxis2=dict(
        title='Average GPA',
        titlefont=dict(color='#AB802D'),
        tickfont=dict(color='#AB802D'),
        overlaying='y',
        side='right'
    )
)

# Show plot
fig.show()


In [7]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Set random seed for reproducibility
np.random.seed(42)

# Generate random data
num_students = 100
data = {
    'extracurricular': np.random.randint(0, 2, num_students),
    'sports': np.random.randint(0, 2, num_students),
    'music': np.random.randint(0, 2, num_students),
    'volunteering': np.random.randint(0, 2, num_students),
    'GPA': np.round(np.random.uniform(2.0, 4.0, num_students), 2)
}

df = pd.DataFrame(data)

# Calculate average GPA for each activity
activities = ['extracurricular', 'sports', 'music', 'volunteering']
average_gpa_per_activity = {}

for activity in activities:
    avg_gpa_participating = df[df[activity] == 1]['GPA'].mean()
    avg_gpa_not_participating = df[df[activity] == 0]['GPA'].mean()
    average_gpa_per_activity[activity] = {
        'Participating': avg_gpa_participating,
        'Not Participating': avg_gpa_not_participating
    }

# Prepare data for plotting
categories = ['Participating', 'Not Participating']
fig = go.Figure()

for activity, gpa_values in average_gpa_per_activity.items():
    fig.add_trace(go.Bar(
        x=categories,
        y=[gpa_values['Participating'], gpa_values['Not Participating']],
        name=f'GPA for {activity.capitalize()}',
        marker=dict(line=dict(width=1)),
        text=f'{activity.capitalize()}',
        hovertemplate='%{text}<br>Category: %{x}<br>GPA: %{y}<extra></extra>'
    ))

# Update layout
fig.update_layout(
    title='Average GPA by Activity Participation',
    xaxis_title='Participation Status',
    yaxis_title='Average GPA',
    barmode='group',
    hovermode='x'
)

# Show plot
fig.show()
