In [None]:

!pip install dash

In [None]:
import pandas as pd
import plotly.express as px

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


In [None]:
# Read the CSV file from Google Drive
data = pd.read_csv('/content/drive/MyDrive/Colab_files/nobel_latest.csv')

# Print the first rows
print(data.head())


In [None]:
print(data[data['Year']==2020])

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
# Select non-numeric (object) columns
non_numeric_columns = data.select_dtypes(include='object')

# Display the selected non-numeric columns
print(non_numeric_columns)


In [None]:
for column in non_numeric_columns.columns:
  print(data[column].value_counts())

In [None]:
for column in non_numeric_columns.columns:
  print(data[column].unique())

In [None]:
for column in non_numeric_columns.columns:
  print(column, data[column].nunique())

#Interactive Map for Winners by Country

In [None]:
# Aggregate the counts of winners per country
winners_by_country = data['Birth_Country'].value_counts().reset_index()
winners_by_country.columns = ['Country', 'Count']
print(winners_by_country)

In [None]:
import plotly.express as px

# Create an interactive map using Plotly Express
fig = px.choropleth(winners_by_country,
                    locations='Country',
                    locationmode='country names',
                    color='Count',
                    hover_name='Country',
                    projection='natural earth',
                    color_continuous_scale='Spectral')  # Choose a different color scale

# Customize the map layout
fig.update_layout(title='Nobel Prize Winners by Country')

# Show the interactive map
fig.show()


In [None]:
# Aggregate the counts of prizes by category
# Aggregate the counts of prizes by country and category
prizes_by_country_category = data.groupby(['Birth_Country', 'Category']).size().reset_index(name='Count')
prizes_by_country_category


Interactive graph for the average age of winners each year

In [None]:
# Convert the birth date to datetime format, handling errors
data['Birth_Date'] = pd.to_datetime(data['Birth_Date'], errors='coerce')

# Drop rows with NaN values in the 'Birth_Date' column
data = data.dropna(subset=['Birth_Date'])

# Calculate the age of winners when they won the prize
data['Age'] = data['Year'] - data['Birth_Date'].dt.year
print(data.head())
print(max(data['Age'] ))

# Calculate the average age of winners per year
average_age = data.groupby('Year')['Age'].mean().reset_index()
print(average_age.head())

# Create an interactive line plot using Plotly Express
fig = px.line(average_age, x='Year', y='Age', title='Average Age of Nobel Prize Winners over the Years')

# Add labels and formatting to the plot
fig.update_layout(
        xaxis_title='Year',
        yaxis_title='Average Age',
        plot_bgcolor='white',
        hovermode='x',
    )

# Display the interactive graph
fig.show()


Interactive graph for the female - male ratio

In [None]:
# Calculate the count of females and males per year
gender_count = data.groupby(['Year', 'Gender'])['Laureate_Id'].count().unstack().fillna(0)

# Calculate the ratio of females to males per year
gender_ratio = gender_count['female'] / gender_count['male']

# Create an interactive line plot using Plotly Express
fig = px.line(x=gender_ratio.index, y=gender_ratio.values, title='Ratio of Female to Male Nobel Prize Winners over the Years')

# Add labels and formatting to the plot
fig.update_layout(
    xaxis_title='Year',
    yaxis_title='Female-to-Male Ratio',
    plot_bgcolor='white',
    hovermode='x',
)

# Display the interactive graph
fig.show()
