In [1]:
!pip install -q streamlit
!npm install -g localtunnel


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K
added 22 packages in 4s
[1G[0K⠙[1G[0K
[1G[0K⠙[1G[0K3 packages are looking for funding
[1G[0K⠙[1G[0K  run `npm fund` for details
[1G[0K⠙[1G[0K

In [7]:
%%writefile app.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

st.set_page_config(layout="wide")

# Load and clean the dataset
@st.cache_data
def load_and_clean_data():
    url = "https://raw.githubusercontent.com/cbrown-clu/class_data/refs/heads/main/data/DS_job_roles_UK.csv"
    df = pd.read_csv(url)
    df.columns = df.columns.str.strip()

    # Apply the cleaning functions
    def clean_salary(salary):
        salary = str(salary)
        salary = re.sub(r'\(.*?\)', '', salary)  # Remove parentheses and contents
        # Remove symbols including currency symbols
        salary = re.sub(r'[£,K]|FCFA\xa0', '', salary)
        salary = salary.strip()  # Remove leading/trailing spaces

        if '-' in salary:  # Handle ranges by taking the average
            try:
                low, high = map(int, salary.split('-'))
                salary = (low + high) / 2
            except ValueError:
                # Handle cases with invalid salary ranges
                return float('nan')

        try:
            return float(salary)  # Convert to float
        except ValueError:
            return float('nan')  # Return NaN if conversion fails

    def clean_date(date_str):
        date_str = str(date_str)
        date_str = re.sub(r'[^\d]', '', date_str)  # Remove non-digit characters
        if date_str == '':
            return float('nan')
        try:
            return int(date_str)
        except ValueError:
            return float('nan')

    df['Salary'] = df['Salary'].apply(clean_salary)
    df['Salary'] = df['Salary'].fillna(0)
    df['Date'] = df['Date'].apply(clean_date)
    df['Date'] = df['Date'].fillna(0)
    df = df.rename(columns={'Salary': 'Salary (in Thousands)', 'Date': 'Date (Days)'})

    return df

df = load_and_clean_data()

#  Interactive elements
st.sidebar.header("Interactive Elements")

# Company selection (multiselect)
company_options = df['Company'].unique()
selected_companies = st.sidebar.multiselect("Select Companies", company_options, default=company_options[:3])  # Default to first 3


#  Filter data based on selections
filtered_df = df[df['Company'].isin(selected_companies)]

# Company Score Plot

st.header("Company Scores")

# Slider for number of companies to display
num_companies_to_display = st.slider("Number of Companies to Display", 1, len(selected_companies), len(selected_companies))

# Filter data for the plot
plot_df = filtered_df.groupby('Company')['Company Score'].mean().reset_index()
plot_df = plot_df.sort_values(by=['Company Score'], ascending=False).head(num_companies_to_display)


fig, ax = plt.subplots(figsize=(8, 4))  # Adjust figsize to reduce size
sns.barplot(x='Company', y='Company Score', data=plot_df, ax=ax)
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
st.pyplot(fig)


# Interactive Table
st.header("Interactive Data Table")

# Number of rows to display
num_rows = st.slider("Number of Rows to Display", 1, len(df), 10)

# Columns to display
columns_to_display = st.multiselect("Select Columns to Display", df.columns, default=df.columns[:5])  # Default to first 5 columns

# Display the table
st.dataframe(df[columns_to_display].head(num_rows))

Overwriting app.py


In [3]:
!streamlit run app.py &>/content/logs.txt &


In [4]:
import time
time.sleep(5)


In [5]:
import urllib
print("Password for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

Password for localtunnel is: 34.82.176.133


In [6]:
!npx localtunnel --port 8501



[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0Kyour url is: https://five-things-love.loca.lt
^C
