# Task 1: Environment Setup
> nstall and import required libraries (pandas, streamlit, pyngrok, plotly.express).

In [1]:
# Install Streamlit and pyngrok
!pip install streamlit -q
!pip install pyngrok -q

# Import libraries
import streamlit as st
import pandas as pd
import plotly.express as px
from pyngrok import ngrok
import os
import subprocess
import time


[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


# Load and Explote Dataset
> Load the Global Land Temperatures dataset into Pandas.

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/Steven-Alvarado/Global-Temperature-Analysis/refs/heads/main/GlobalTemperatures.csv")
df.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1850-01-01,0.749,1.105,8.242,1.738,-3.206,2.822,12.833,0.367
1,1850-02-01,3.071,1.275,9.97,3.007,-2.291,1.623,13.588,0.414
2,1850-03-01,4.954,0.955,10.347,2.401,-1.905,1.41,14.043,0.341
3,1850-04-01,7.217,0.665,12.934,1.004,1.018,1.329,14.667,0.267
4,1850-05-01,10.004,0.617,15.655,2.406,3.811,1.347,15.507,0.249


In [9]:
df.columns

Index(['dt', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'LandMaxTemperature', 'LandMaxTemperatureUncertainty',
       'LandMinTemperature', 'LandMinTemperatureUncertainty',
       'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty'],
      dtype='object')

In [None]:
df['dt'] = pd.to_datetime(df['dt'])
df['dt'].min().date(), df['dt'].max().date()

(datetime.date(1850, 1, 1), datetime.date(2015, 12, 1))

In [10]:
df.describe()

Unnamed: 0,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
count,1992.0,1992.0,1992.0,1992.0,1992.0,1992.0,1992.0,1992.0
mean,8.571583,0.276663,14.350601,0.479782,2.743595,0.431849,15.212566,0.128532
std,4.263193,0.22403,4.309579,0.583203,4.155835,0.445838,1.274093,0.073587
min,0.404,0.034,5.9,0.044,-5.407,0.045,12.475,0.042
25%,4.43,0.09975,10.212,0.142,-1.3345,0.155,14.047,0.063
50%,8.8505,0.23,14.76,0.252,2.9495,0.279,15.251,0.122
75%,12.8585,0.34725,18.4515,0.539,6.77875,0.45825,16.39625,0.151
max,15.482,1.492,21.32,4.373,9.715,3.498,17.611,0.457


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1992 entries, 0 to 1991
Data columns (total 9 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   dt                                         1992 non-null   object 
 1   LandAverageTemperature                     1992 non-null   float64
 2   LandAverageTemperatureUncertainty          1992 non-null   float64
 3   LandMaxTemperature                         1992 non-null   float64
 4   LandMaxTemperatureUncertainty              1992 non-null   float64
 5   LandMinTemperature                         1992 non-null   float64
 6   LandMinTemperatureUncertainty              1992 non-null   float64
 7   LandAndOceanAverageTemperature             1992 non-null   float64
 8   LandAndOceanAverageTemperatureUncertainty  1992 non-null   float64
dtypes: float64(8), object(1)
memory usage: 140.2+ KB


In [12]:
# Check for missing values
df.isnull().sum()

dt                                           0
LandAverageTemperature                       0
LandAverageTemperatureUncertainty            0
LandMaxTemperature                           0
LandMaxTemperatureUncertainty                0
LandMinTemperature                           0
LandMinTemperatureUncertainty                0
LandAndOceanAverageTemperature               0
LandAndOceanAverageTemperatureUncertainty    0
dtype: int64

# Task3: Data cleaning

> Prepare dataset for visualization.

In [3]:
# Convert dt column to datetime
df['dt'] = pd.to_datetime(df['dt'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1992 entries, 0 to 1991
Data columns (total 9 columns):
 #   Column                                     Non-Null Count  Dtype         
---  ------                                     --------------  -----         
 0   dt                                         1992 non-null   datetime64[ns]
 1   LandAverageTemperature                     1992 non-null   float64       
 2   LandAverageTemperatureUncertainty          1992 non-null   float64       
 3   LandMaxTemperature                         1992 non-null   float64       
 4   LandMaxTemperatureUncertainty              1992 non-null   float64       
 5   LandMinTemperature                         1992 non-null   float64       
 6   LandMinTemperatureUncertainty              1992 non-null   float64       
 7   LandAndOceanAverageTemperature             1992 non-null   float64       
 8   LandAndOceanAverageTemperatureUncertainty  1992 non-null   float64       
dtypes: datetime64[ns](1)

In [14]:
df.head()


Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1850-01-01,0.749,1.105,8.242,1.738,-3.206,2.822,12.833,0.367
1,1850-02-01,3.071,1.275,9.97,3.007,-2.291,1.623,13.588,0.414
2,1850-03-01,4.954,0.955,10.347,2.401,-1.905,1.41,14.043,0.341
3,1850-04-01,7.217,0.665,12.934,1.004,1.018,1.329,14.667,0.267
4,1850-05-01,10.004,0.617,15.655,2.406,3.811,1.347,15.507,0.249


In [87]:
unique_months = df['dt'].dt.month.unique()
unique_months.sort()
unique_months

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [None]:
# 

# Task4: Write the streamlit app

In [4]:
%%writefile app.py
import streamlit as st
import pandas as pd 
import plotly.express as px
from PIL import Image
import requests
from io import BytesIO

# Set page configuration to wide mode for full width content
st.set_page_config(layout="wide")

def load_data():
    df = pd.read_csv("https://raw.githubusercontent.com/Steven-Alvarado/Global-Temperature-Analysis/refs/heads/main/GlobalTemperatures.csv")
    df['dt'] = pd.to_datetime(df['dt'])
    df.drop(['LandAverageTemperatureUncertainty', 'LandMaxTemperatureUncertainty', 'LandMinTemperatureUncertainty', 'LandAndOceanAverageTemperatureUncertainty'], axis=1, inplace=True)
    return df

def plot_data(data, x, y, title, chart_type='line', color=None):
    st.subheader(title)
    # give an alias for the x-axis and y-axis
    if chart_type == 'line':
        fig = px.line(data, x=x, y=y, labels={x:'Date'}, color_discrete_sequence=[color] if color else None)
    elif chart_type == 'scatter':
        fig = px.scatter(data, x=x, y=y, labels={x:'Date'}, color_discrete_sequence=[color] if color else None)
    elif chart_type == 'area':
        fig = px.area(data, x=x, y=y, labels={x:'Date'}, color_discrete_sequence=[color] if color else None)
    elif chart_type == 'bar':
        fig = px.bar(data, x=x, y=y, labels={x:'Date'}, color_discrete_sequence=[color] if color else None)
    
   
    st.plotly_chart(fig)

def aggregate_by_timeframe(df, time_frame):
    """Aggregate data based on selected time frame"""
    if time_frame == 'Daily':
        # No aggregation needed for daily data
        return df
    
    # Create a copy to avoid modifying the original dataframe
    agg_df = df.copy()
    
    if time_frame == 'Monthly':
        # Extract year and month from date for grouping
        agg_df['year_month'] = agg_df['dt'].dt.to_period('M')
        # Group by year-month and calculate mean temperatures
        grouped = agg_df.groupby('year_month').agg({
            'LandAverageTemperature': 'mean',
            'LandMaxTemperature': 'mean',
            'LandMinTemperature': 'mean',
            'LandAndOceanAverageTemperature': 'mean'
        }).reset_index()
        # Convert period to datetime for plotting
        grouped['dt'] = grouped['year_month'].dt.to_timestamp()
        return grouped
    
    elif time_frame == 'Yearly':
        # Extract year from date for grouping
        agg_df['year'] = agg_df['dt'].dt.year
        # Group by year and calculate mean temperatures
        grouped = agg_df.groupby('year').agg({
            'LandAverageTemperature': 'mean',
            'LandMaxTemperature': 'mean',
            'LandMinTemperature': 'mean',
            'LandAndOceanAverageTemperature': 'mean'
        }).reset_index()
        # Convert year to datetime for plotting
        grouped['dt'] = pd.to_datetime(grouped['year'], format='%Y')
        return grouped
    
    return df  # Default fallback

def create_sidebar(df):
    st.sidebar.title("Filters")
    st.sidebar.write("Use the controls to filter the data.")
    
    start_date = st.sidebar.date_input("Start Date", df['dt'].min().date(), min_value=df['dt'].min().date(), max_value=df['dt'].max().date())
    end_date   = st.sidebar.date_input("End Date", df['dt'].max().date(), min_value=df['dt'].min().date(), max_value=df['dt'].max().date())
    time_frame = st.sidebar.selectbox("Select Time Frame", ["Daily", "Monthly", "Yearly"])
    chart_type = st.sidebar.radio("Select chart type", ["line", "scatter", "area", "bar"])
    color = st.sidebar.color_picker("Pick a color for the plots", "#FF5733")
    
    # Filter by dates
    filtered_data = df[(df['dt'].dt.date >= start_date) & (df['dt'].dt.date <= end_date)]
    
    # Apply time frame aggregation
    aggregated_data = aggregate_by_timeframe(filtered_data, time_frame)
    
    st.write(f"Data from **{start_date}** to **{end_date}** (Time frame: **{time_frame}**)")

    # Create four plots listed below each other with different y-axix
    plot_data(aggregated_data, 'dt', 'LandAverageTemperature', 'Land Average Temperature', chart_type, color)
    plot_data(aggregated_data, 'dt', 'LandMaxTemperature', 'Land Max Temperature', chart_type, color)
    plot_data(aggregated_data, 'dt', 'LandMinTemperature', 'Land Min Temperature', chart_type, color)
    plot_data(aggregated_data, 'dt', 'LandAndOceanAverageTemperature', 'Land and Ocean Average Temperature', chart_type, color)

    return start_date, end_date

# Display the main title - ensuring it takes full width
st.title("Global Land Temperatures Explorer")

# Display colorful weather logo from URL in sidebar only
logo_url = "https://cdn-icons-png.flaticon.com/512/4052/4052984.png"  # Colorful weather/climate logo
try:
    response = requests.get(logo_url)
    logo = Image.open(BytesIO(response.content))
    st.sidebar.image(logo, width=150, caption="Global Weather Data")
except Exception as e:
    st.sidebar.error(f"Could not load logo: {e}")

st.write("Interactive dashboard to explore climate trends.")

# Load data
df = load_data()

# Sidebar
create_sidebar(df)

Writing app.py


# Task 5: Run and Deploy App with Ngrok

In [5]:
from IPython.display import display, IFrame
import subprocess
import time
import pyngrok.ngrok as ngrok

# Replace with your actual authtoken
authtoken = "33HpzfiMqmVEJg6Esj6VcWtTcS0_2AfY8uRUUXjp6NRCrhvwJ"

if authtoken:
    try:
        # Kill any existing ngrok tunnels
        ngrok.kill()

        # Set the authtoken
        ngrok.set_auth_token(authtoken)

        # Start Streamlit app
        process = subprocess.Popen(["streamlit", "run", "app.py"])
        print("Starting Streamlit server in the background...")

        # Wait for the app to initialize
        time.sleep(5)

        # Open ngrok tunnel (no custom subdomain to avoid ERR_NGROK_313)
        public_url = ngrok.connect(addr='8501')
        print(f"Streamlit App URL: {public_url}")
        print("The dashboard is now running below. It may take a moment to load.")

        # Display the app in an IFrame
        display(IFrame(public_url, width='100%', height=800))

    except Exception as e:
        print(f"An error occurred: {e}")
        print("Please ensure your authtoken is correct and that Streamlit is installed.")
else:
    print("Please enter your ngrok authtoken to run the app.")

Starting Streamlit server in the background...
Streamlit App URL: NgrokTunnel: "https://unauthorized-joeann-mutably.ngrok-free.dev" -> "http://localhost:8501"
The dashboard is now running below. It may take a moment to load.
