In [None]:
import streamlit as st
import pickle
import numpy as np


def load_model():
    with open('saved_steps.pkl', 'rb') as file:
        data = pickle.load(file)
    return data

data = load_model()

regressor = data["model"]
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt

def shorten_categories(categories, cutoff):
    categorical_map = {}
    for i in range(len(categories)):
        if categories.values[i] >= cutoff:
            categorical_map[categories.index[i]] = categories.index[i]
        else:
            categorical_map[categories.index[i]] = 'Other'
    return categorical_map


def clean_experience(x):
    if x ==  'More than 50 years':
        return 50
    if x == 'Less than 1 year':
        return 0.5
    return float(x)


def clean_title(x):
    if 'Bachelor’s degree' in x:
        return 'Bachelor’s degree'
    if 'Master’s degree' in x:
        return 'Master’s degree'
    if 'Professional degree' in x or 'Other doctoral' in x:
        return 'Post grad'
    return 'Less than a Bachelors'


@st.cache
def load_data():
    df = pd.read_csv("Levels_Fyi_Salary_Data")
    df = df[["Location", "Title", "YearsofExperience", "BaseSalary"]]
    df = df[df["ConvertedComp"].notnull()]
    df = df.dropna()
    df = df[df["Employment"] == "Employed full-time"]
    df = df.drop("Employment", axis=1)

   le_location = shorten_categories(df.Location.value_counts(), 400)
    df["Location"] = df["Country"].map(le_location)
    df = df[df["BaseSalary"] <= 250000]
    df = df[df["BaseSalary"] >= 10000]
    df = df[df["Location"] != "Other"]

    df["YearsofExperience"] = df["YearsofExperience"].apply(clean_experience)
    df["Title"] = df["Title"].apply(clean_title)
    df = df.rename({"BaseSalary": "Salary"}, axis=1)
    return df

df = load_data()

def show_explore_page():
    st.title("Explore Salaries")

    st.write(
        """
    ### Stack Overflow Developer Survey 2020
    """
    )

    data = df["Location"].value_counts()

    fig1, ax1 = plt.subplots()
    ax1.pie(data, labels=data.index, autopct="%1.1f%%", shadow=True, startangle=90)
    ax1.axis("equal")  # Equal aspect ratio ensures that pie is drawn as a circle.

    st.write("""#### Number of Data from different countries""")

    st.pyplot(fig1)
    
    st.write(
        """
    #### Mean Salary Based On Location
    """
    )

    data = df.groupby(["Location"])["BaseSalary"].mean().sort_values(ascending=True)
    st.bar_chart(data)

    st.write(
        """
    #### Mean Salary Based On Experience
    """
    )

    data = df.groupby(["YearsofExperience"])["BaseSalary"].mean().sort_values(ascending=True)
    st.line_chart(data)