In [129]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go 
import plotly.express as px
import seaborn as sns 
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter

In [2]:
df=pd.read_csv("../input/nobel-prize-19012016/nobel.csv")

In [79]:
df.head()

In [77]:
df.shape 

In [75]:
df.prize_share.value_counts()

In [73]:
x=(344/1)+(306/2)+(201/3)+(60/4)
x

In [130]:
fig=go.Figure()
fig.add_trace(go.Indicator(mode="number",value=int(df.shape[0]),title={"text":"number of laureates"},domain={"row":0,"column":0}))
fig.add_trace(go.Indicator(mode="number",value=int(x),title={"text":"number of Nobel prizes"},domain={"row":1,"column":0}))
fig.update_layout(grid={"rows":2,"columns":1,"pattern":"independent"})

In [88]:
con_df=df.birth_country.value_counts()[:10].reset_index()
con_df.columns=["birth_country","count"]
fig= px.bar(con_df,x="birth_country",y="count",text="count",title="Top 10 countries")
fig.show()

In [95]:
df['usa_born_winner'] = df['birth_country']=="United States of America"
df['decade'] = (np.floor(df['year']/10)*10).astype(int)
usa=df.groupby("decade",as_index=False)["usa_born_winner"].mean()
display(usa)

In [102]:
sns.set_theme()
fig_dims=(20,10)
fig,ax=plt.subplots(figsize=fig_dims)
sns.lineplot(data=usa,x="decade",y="usa_born_winner",marker="o")
ax.yaxis.set_major_formatter(PercentFormatter(1.0))
fig.show()

In [104]:
pie_df=df.sex.value_counts().reset_index()
pie_df.columns=['sex','count']
fig=px.pie(pie_df,values="count",names="sex",title="proportion of genders",color_discrete_sequence=["#ff0000","#6600ff"])
fig.show()

In [107]:
df['female'] = df['sex']=="Female"
fem_winner=df.groupby("decade",as_index=False)["female"].mean()
display(fem_winner)

In [108]:
sns.set_theme()
fig_dims=(20,10)
fig,ax=plt.subplots(figsize=fig_dims)
sns.lineplot(data=fem_winner,x="decade",y="female",marker="o",color="#6600ff")
ax.yaxis.set_major_formatter(PercentFormatter(1.0))
fig.show()

In [109]:
fem_df=df[df['female']].category.value_counts().reset_index()
fem_df.columns=["category","count"]
fig= px.bar(fem_df,x="category",y="count",text="count",title="Top category for females",color_discrete_sequence=["#6600ff"])
fig.show()

In [110]:
f=df[df["female"]].nsmallest(1,"year")
display(f[["full_name","prize","category","year"]])

In [118]:
repeat=df.groupby(['full_name']).filter(lambda group:len(group)>1)
display(repeat[['full_name','category','prize','year']])

In [119]:
rep_df=repeat.full_name.value_counts().reset_index()
rep_df.columns=["full_name","count"]
fig= px.bar(rep_df,x="full_name",y="count",text="count",title="Repeated laureates")
fig.show()

In [121]:
df["birth_date"]=pd.to_datetime(df["birth_date"])
df["age"]=df["year"] - df["birth_date"].dt.year
df.info()

In [122]:
def GetGrade(age):
    if   ( age<=14 ):
        return "Child"
    elif (age<=25):
        return "Youth"
    elif (age<=64):
        return "Adult"
    else:
        return "Senior"
    
df['age_group'] = df.apply(lambda x : GetGrade(x['age']), axis=1)

In [126]:
plt.figure(figsize=(15,9))
ax = sns.countplot(hue='age_group',x='category',data=df,palette='Set1')
ax.legend(loc='upper right',frameon=False)

In [127]:
display(df.nsmallest(1,"age"))

In [128]:
display(df.nlargest(1,"age"))