In [None]:
import requests
from bs4 import BeautifulSoup
import wikipedia

import pandas as pd
from IPython.display import display, clear_output

def convert_encoding(element):
    if isinstance(element, str):
        return element.encode('ISO-8859-1').decode('UTF-8')
    else:
        return element

def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', {'class': 'wikitable'})
    rows = table.find_all('tr')
    data = []
    for row in rows:
        cols = row.find_all('td')
        cols = [col.text.strip() for col in cols]
        data.append(cols)
    return data


def clean_dataframe(df):

    # Load the data with specified encoding
    df = pd.read_csv('nobel.csv', encoding='ISO-8859-1')

    # Remove whitespaces
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    # Handle missing values (this will depend on your specific data)
    df = df.dropna()  # This line removes any rows with missing values

    # Convert data to appropriate types
    df['year'] = pd.to_datetime(df['year'], format='%Y', errors='coerce')

    df = df.applymap(convert_encoding)
    df = df.dropna(how='any')
    display(df)

    display(df.head())
    df = pd.read_csv('nobel.csv').dropna(how='all')
    df = df.fillna('No award')


    df.to_csv("nobel.csv", index=False)

    return df

url = 'https://en.wikipedia.org/wiki/List_of_Nobel_laureates'


df = pd.DataFrame(scrape_data(url), columns=['Year', 'Physics', 'Chemistry', 'Medicine', 'Literature', 'Peace', 'Economics'])

df = clean_dataframe(df)


def look_year_and_category(year, category):
    df_year = df[(df['year'] == year)]
    word_list = df_year[category].tolist()
    word_list = (word_list[0].split(';'))
    return word_list


print(look_year_and_category(1998, 'economics'))




def find_column_with_name(df, name):
    # Convert all columns to string and apply the mask
    mask = df.applymap(str).apply(lambda col: col.str.contains(name, na=False))
    
    # Find columns where 'name' is found
    columns_with_name = df.columns[mask.any(axis=0)]
    
    # Return the column names as a list
    return list(columns_with_name)

print(find_column_with_name(df, 'Marie Curie'))


wikipedia.set_lang("en")  # For Spanish, for example


# Search for pages
results = wikipedia.search("Nobel Prize")

# Access a specific page
page = wikipedia.page("List of Nobel laureates by country")

# Get the whole page content
content = page.content



try:
    page = wikipedia.page("List of Nobel laureates by country")
    content = page.content
    contentList = content.split("===")
    your_string = "This is the string you want to save as a text file."

    # Specify the filename
    filename = "country.txt"

    

    # Writing the string to the text file
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(content)

    print(f"Content saved to {filename}")

except wikipedia.exceptions.PageError:
    print("Page not found")
except wikipedia.exceptions.DisambiguationError as e:
    print("Disambiguation error. Possible options include:")
    print(e.options)




In [None]:
country = "United States"
contentListCountry = contentList

def set_countries_to_list(): 
    
    for i, content in enumerate(contentListCountry):
        if i % 2 != 0:  # If the index is uneven
            contentListCountry[i] = content.strip()  # Remove spaces at the beginning and end
            #print(contentListCountry[i])

    return contentListCountry

set_countries_to_list()

In [None]:
def give_me_people_by_country(country):
    if country in contentListCountry:
        foundindex = contentListCountry.index(country)
        
        #print(contentList[foundindex+1])
        return contentList[foundindex+1]
    else:
        print("Country not found")

   


In [None]:
lista_de_personas = give_me_people_by_country("Canada")
print(lista_de_personas)



In [None]:
import pandas as pd



def filterable(data_string):
    # The string containing the data
    data_string 

    # Remove parentheses and split the string into lines
    data_string = data_string.strip("()")
    lines = data_string.split('\n')

    # Parse each line and collect data
    data = []
    for line in lines:
        # Splitting each line by comma and stripping extra whitespace
        parts = [part.strip() for part in line.split(',')]
        if len(parts) == 3:  # Making sure the line is properly formatted
            data.append(parts)

    # Creating a DataFrame
    df = pd.DataFrame(data, columns=['Name', 'Prize', 'Year'])

    #display(df)
    return df

In [None]:
def find_final(country, category):
    lista_de_personas = give_me_people_by_country(country)
    df = filterable(lista_de_personas)
    df_category = df[df['Prize'] == category]

    if df_category.empty:
        print(f'No awards for {country} in {category}')
    else:
        display(df_category[['Name','Prize']])

        
        

In [None]:
find_final("Canada", "Economics")

In [None]:
find_final("Mexico", "Economics")

In [None]:
find_final("Mexico", "Physics")

In [None]:
find_final("Mexico", "Chemistry")

In [None]:
find_final("United States", "Physics")

In [None]:
import ipywidgets as widgets





def wiki_nobel():
    category_dropdown = widgets.Dropdown(
        options=['Physics', 'Chemistry', 'Medicine', 'Literature', 'Peace', 'Economics'],
        
        description='Category:',
        disabled=False,
    )



    countries_list = set_countries_to_list()
    list_of_countries = []

    for i, content in enumerate(countries_list):
            if i % 2 != 0:  # If the index is uneven
                contentListCountry[i] = content.strip()  # Remove spaces at the beginning and end
                list_of_countries.append(contentListCountry[i])


    country_dropwdown = widgets.Dropdown(
        options=list_of_countries,
        
        description='Country:',
        disabled=False,
    )

    button = widgets.Button(description='Search Nobel!')

    display(category_dropdown,country_dropwdown,button)

    def on_button_clicked(b): 
        print ("Searching...")
        clear_output()
        display(category_dropdown,country_dropwdown,button)
        response = find_final(country_dropwdown.value, category_dropdown.value)
        

    button.on_click(on_button_clicked)

    #pd.set_option(display.max_rows, 200)   

In [None]:
wiki_nobel()