!pip install flask pandas matplotlib flask_cors seaborn catboost

In [2]:
from flask import Flask, request, jsonify, render_template, send_file
import pickle
from flask_cors import CORS
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')  # Set backend before importing pyplot
from io import BytesIO
import base64
from threading import Thread


app = Flask(__name__)
CORS(app) 

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def receive_data():
    try:
        data = request.get_json()
        print("Received data:", data)

        float_list = [float(value) for value in data.values()]
        print("Converted float list:", float_list)

        with open('catBoost_model', 'rb') as f:
            cat_model = pickle.load(f)

        prediction = cat_model.predict([float_list])
        print("Prediction:", prediction)

        response = {
            "message": "Data received successfully",
            "data": prediction.tolist() 
        }
        return jsonify(response)

    except Exception as e:
        return jsonify({"error": str(e)}), 500
    
@app.route('/cleaned-data-analysis')
def show_graphs_cleaned():

    graphs = []

    df = pd.read_csv('final_cleaned_data.csv')

    # Count plot
    fig = plt.figure(figsize=(8, 4))
    sns.countplot(data=df, x='RainTomorrow', palette='Set2')
    plt.title('Rain vs No Rain Distribution')
    plt.xlabel('Rain Tomorrow')
    plt.ylabel('Count')

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')
    rain_data = df['Rainfall'].dropna()
    fig = plt.figure(figsize=(8, 5))
    sns.histplot(rain_data, bins=30, kde=True, color='skyblue')
    plt.title('Histogram of Rainfall Distribution')
    plt.xlabel('Rainfall (mm)')
    plt.ylabel('Frequency')
    plt.grid(True)

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')
    df_cleaned = df[['Rainfall', 'RainTomorrow']].dropna()
    # Boxplot
    fig = plt.figure(figsize=(7, 5))
    sns.boxplot(x='RainTomorrow', y='Rainfall', data=df_cleaned, palette='Set3')
    plt.title('Boxplot of Rainfall by RainTomorrow')
    plt.xlabel('Rain Tomorrow')
    plt.ylabel('Rainfall (mm)')

    plt.grid(True)

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')  # Replace with actual filename

    # Drop missing values for MaxTemp
    max_temp = df['MaxTemp'].dropna()

    # Plot histogram
    fig = plt.figure(figsize=(8, 5))
    sns.histplot(max_temp, bins=30, kde=True, color='coral')
    plt.title('Histogram of Max Temperature Distribution')
    plt.xlabel('Max Temperature (°C)')
    plt.ylabel('Frequency')
    plt.grid(True)

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')
    corr = df.corr(numeric_only=True)
    fig = plt.figure(figsize=(12, 12))
    sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", linewidths=0.5)
    plt.title("Correlation Heatmap of Numerical Features")

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # df = pd.read_csv('final_cleaned_data.csv')
    # features = ['Rainfall', 'MinTemp', 'MaxTemp', 'Humidity9am', 'Humidity3pm']
    # df_cleaned = df[features].dropna()
    # sns.pairplot(df_cleaned)
    # plt.suptitle("Pairplot of Key Weather Features", y=1.02)
    # 

    # buf = BytesIO()
    # fig.savefig(buf, format='png', dpi=150)
    # plt.close(fig)
    # graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')
    features = ['Rainfall', 'MinTemp', 'MaxTemp', 'Humidity9am', 'Humidity3pm']
    df_clean = df[features].dropna()

    # Create the pairplot (returns a PairGrid object)
    pairgrid = sns.pairplot(df_clean)

    # Access the matplotlib Figure from PairGrid and save
    fig = pairgrid.fig
    fig.suptitle("Pairplot of Key Weather Features", y=1.02)

    buf = BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    image_base64 = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a Violin Plot of Humidity at 3 PM by RainTomorrow
    fig = plt.figure(figsize=(10, 7))
    sns.violinplot(
        x='RainTomorrow',
        y='Humidity3pm',
        data=df
    )

    plt.title('Violin Plot of Humidity at 3 PM by Rain Tomorrow')
    plt.xlabel('Rain Tomorrow')
    plt.ylabel('Humidity at 3 PM (%)')

    # Optional: Add grid for better readability if desired
    plt.grid(axis='y', linestyle='--', alpha=0.7)

    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv') 
    wind_gust = df['WindGustSpeed'].dropna()

    fig = plt.figure(figsize=(8, 5))
    sns.histplot(wind_gust, bins=25, kde=True, color='skyblue')
    plt.title('Histogram of Wind Gust Speed')
    plt.xlabel('Wind Gust Speed (km/h)')
    plt.ylabel('Frequency')
    plt.grid(True)

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # df = pd.read_csv('final_cleaned_data.csv')
    # df_clean = df[['MaxTemp', 'Rainfall']].dropna()

    # fig = plt.scatter(df_clean['MaxTemp'], df_clean['Rainfall'])
    # plt.title('Max Temperature vs Rainfall')
    # plt.xlabel('Max Temperature (°C)')
    # plt.ylabel('Rainfall (mm)')
    # 

    # buf = BytesIO()
    # fig.savefig(buf, format='png', dpi=150)
    # plt.close(fig)
    # graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Read and clean data
    df = pd.read_csv('final_cleaned_data.csv')
    df_clean = df[['MaxTemp', 'Rainfall']].dropna()

    # Create figure and plot
    fig, ax = plt.subplots()
    ax.scatter(df_clean['MaxTemp'], df_clean['Rainfall'])
    ax.set_title('Max Temperature vs Rainfall')
    ax.set_xlabel('Max Temperature (°C)')
    ax.set_ylabel('Rainfall (mm)')

    # Save to buffer
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)

    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')
    df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
    df_sorted = df.sort_values(by='Date') # Ensure the DataFrame is sorted by date for accurate time-series plotting.
    df_monthly = df_sorted.set_index('Date').resample('M')[['MinTemp', 'MaxTemp']].mean().reset_index() # To make the plot clearer, resample to monthly averages

    fig = plt.figure(figsize=(14, 7))
    sns.lineplot(data=df_monthly, x='Date', y='MinTemp', label='Min Temperature')
    sns.lineplot(data=df_monthly, x='Date', y='MaxTemp', label='Max Temperature')
    plt.title('Monthly Average Min and Max Temperature Trends Over Time')
    plt.xlabel('Date')
    plt.ylabel('Temperature (°C)')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a bar plot of WindGustDir vs RainTomorrow
    fig = plt.figure(figsize=(12, 7))
    sns.countplot(x='WindGustDir', hue='RainTomorrow', data=df, palette='viridis', order=df['WindGustDir'].value_counts().index)
    plt.title('Wind Gust Direction vs Rain Tomorrow')
    plt.xlabel('Wind Gust Direction')
    plt.ylabel('Count')
    plt.xticks(rotation=45) # Rotate x-axis labels for better readability
    plt.legend(title='Rain Tomorrow')
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Define categories for WindGustSpeed
    # The categories are defined based on the distribution of 'WindGustSpeed' in the data:
    # - Light: < 20 km/h
    # - Moderate: 20 - 40 km/h
    # - Strong: 40 - 60 km/h
    # - Very Strong: > 60 km/h
    bins = [0, 20, 40, 60, df['WindGustSpeed'].max()]
    labels = ['Light (< 20 km/h)', 'Moderate (20 - 40 km/h)', 'Strong (40 - 60 km/h)', 'Very Strong(> 60 km/h)']

    df['WindSpeedCategory'] = pd.cut(df['WindGustSpeed'], bins=bins, labels=labels, right=False)

    # Create the Count Plot
    fig = plt.figure(figsize=(12, 7))
    sns.countplot(x='WindSpeedCategory', hue='RainTomorrow', data=df, palette='viridis', order=labels)
    plt.title('Wind Speed Categories vs Rain Tomorrow')
    plt.xlabel('Wind Speed Category')
    plt.ylabel('Count')
    plt.legend(title='Rain Tomorrow')
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a histogram for WindGustSpeed
    fig = plt.figure(figsize=(10, 6))
    sns.histplot(df['WindGustSpeed'], bins=30, kde=True)
    plt.title('Distribution of WindGustSpeed')
    plt.xlabel('Wind Gust Speed (km/h)')
    plt.ylabel('Frequency')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a boxplot of Pressure3pm by RainTomorrow
    fig = plt.figure(figsize=(8, 6))
    sns.boxplot(x='RainTomorrow', y='Pressure3pm', data=df, palette='viridis')
    plt.title('Boxplot of Pressure at 3 PM by Rain Tomorrow')
    plt.xlabel('Rain Tomorrow')
    plt.ylabel('Pressure at 3 PM (hPa)')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Convert 'Date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Set 'Date' as index for time series resampling
    df_time = df.set_index('Date')

    # Resample Rainfall to monthly sums
    # Using 'M' for month end frequency
    df_monthly_rainfall = df_time['Rainfall'].resample('M').sum().reset_index()

    # Create the Line Plot of Rainfall Over Time
    fig = plt.figure(figsize=(14, 7))
    sns.lineplot(data=df_monthly_rainfall, x='Date', y='Rainfall')
    plt.title('Monthly Total Rainfall Over Time')
    plt.xlabel('Date')
    plt.ylabel('Total Rainfall (mm)')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a histogram of Minimum Temperature Distribution
    fig = plt.figure(figsize=(10, 6))
    sns.histplot(df['MinTemp'], bins=30, kde=True)
    plt.title('Distribution of Minimum Temperature')
    plt.xlabel('Minimum Temperature (°C)')
    plt.ylabel('Frequency')
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create KDE plot for Humidity9am and Humidity3pm
    fig = plt.figure(figsize=(10, 6))
    sns.kdeplot(df['Humidity9am'], fill=True, label='Humidity 9 AM')
    sns.kdeplot(df['Humidity3pm'], fill=True, label='Humidity 3 PM')
    plt.title('KDE Plot of Humidity (9 AM vs 3 PM)')
    plt.xlabel('Humidity (%)')
    plt.ylabel('Density')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    df = pd.read_csv('final_cleaned_data.csv')

    # Create a strip plot of Sunshine vs RainTomorrow
    fig = plt.figure(figsize=(10, 7))
    sns.stripplot(x='RainTomorrow', y='Sunshine', data=df, jitter=True, palette='viridis', alpha=0.6)
    plt.title('Strip Plot of Sunshine vs Rain Tomorrow')
    plt.xlabel('Rain Tomorrow')
    plt.ylabel('Sunshine (hours)')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    import calendar

    df = pd.read_csv('final_cleaned_data.csv')

    # Convert 'Date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])

    # Extract month and day of week
    df['Month'] = df['Date'].dt.month
    df['DayOfWeek'] = df['Date'].dt.dayofweek # Monday=0, Sunday=6

    # Calculate average daily rainfall for each month and day of week
    average_rainfall_by_month_day = df.groupby(['Month', 'DayOfWeek'])['Rainfall'].mean().unstack()

    # Map numerical month to month names
    month_names = [calendar.month_abbr[i] for i in range(1, 13)]
    average_rainfall_by_month_day.index = month_names

    # Map numerical day of week to day names
    day_names = [calendar.day_abbr[i] for i in range(7)]
    average_rainfall_by_month_day.columns = day_names

    # Create the heatmap
    fig = plt.figure(figsize=(12, 8))
    sns.heatmap(average_rainfall_by_month_day, cmap='Blues', annot=True, fmt=".2f", linewidths=.5, cbar_kws={'label': 'Average Rainfall (mm)'})
    plt.title('Calendar-Style Heatmap of Average Daily Rainfall')
    plt.xlabel('Day of Week')
    plt.ylabel('Month')
    plt.tight_layout()

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

#SEND TO HTML
    return render_template('cleaned_data_analysis.html', graphs=graphs)

@app.route('/raw-data-analysis')
def show_graphs_raw():
    graphs_raw = []
    df = pd.read_csv("weatherAUS.csv")

    # Plot 1: Missing Data Heatmap
    fig = plt.figure(figsize=(16, 12))
    sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
    plt.title('Missing Data Heatmap')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=300)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 2: Count Plot of RainTomorrow
    fig = plt.figure(figsize=(6, 4))
    sns.countplot(x='RainTomorrow', data=df, palette='Set2')
    plt.title('Count Plot of RainTomorrow')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 3: Histogram of Rainfall
    fig = plt.figure(figsize=(10, 6))
    sns.histplot(df['Rainfall'], bins=50, kde=False, color='skyblue')
    plt.title('Histogram of Rainfall')
    plt.xlim(0, 50)
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 4: Boxplot of Rainfall by RainTomorrow
    fig = plt.figure(figsize=(8, 6))
    sns.boxplot(x='RainTomorrow', y='Rainfall', data=df, palette='Set3')
    plt.title('Boxplot of Rainfall by RainTomorrow')
    plt.ylim(0, 50)
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 5: Histogram of Wind Speed at 9am
    fig = plt.figure(figsize=(10, 6))
    sns.histplot(df['WindSpeed9am'], bins=30, kde=False, color='cornflowerblue')
    plt.title('Histogram of Wind Speed at 9am')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 6: Scatter Plot of MaxTemp vs MinTemp
    fig = plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df, x='MinTemp', y='MaxTemp', hue='RainTomorrow', alpha=0.5)
    plt.title('MaxTemp vs MinTemp colored by RainTomorrow')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 7: Boxplot of Humidity3pm by RainTomorrow
    fig = plt.figure(figsize=(8, 6))
    sns.boxplot(x='RainTomorrow', y='Humidity3pm', data=df, palette='coolwarm')
    plt.title('Boxplot of Humidity at 3pm by RainTomorrow')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 8: Distribution of Sunshine
    fig = plt.figure(figsize=(10, 6))
    sns.histplot(df['Sunshine'].dropna(), bins=30, kde=True, color='goldenrod')
    plt.title('Distribution of Sunshine')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 9: Violin Plot of Pressure9am by RainTomorrow
    fig = plt.figure(figsize=(8, 6))
    sns.violinplot(x='RainTomorrow', y='Pressure9am', data=df, palette='Pastel1')
    plt.title('Pressure at 9am by RainTomorrow')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    # Plot 10: Countplot of WindGustDir
    fig = plt.figure(figsize=(12, 6))
    sns.countplot(y='WindGustDir', data=df, order=df['WindGustDir'].value_counts().index, palette='Spectral')
    plt.title('Wind Gust Direction Frequency')
    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    graphs_raw.append(base64.b64encode(buf.getvalue()).decode('utf-8'))

    return render_template('raw_data_analysis.html', graphs=graphs_raw)

@app.route('/generate')
def generate():
    return render_template("generate.html")

@app.route('/generated-graph', methods=['POST'])
def generated_graph():

    graphs_generated =[]
    
    df = pd.read_csv("final_cleaned_data.csv")
    df['Date'] = pd.to_datetime(df['Date'], format='mixed')
    
    month = int(request.form['month'])
    year = int(request.form['year'])
    parameter = request.form['parameter']

    df_provided_year = df[df['Date'].dt.year == year]
    df_provided_month = df_provided_year[df_provided_year['Date'].dt.month == month]

    x = type(month)

    fig = plt.figure(figsize=(14,7))
    plt.bar(df_provided_month['Date'], df_provided_month[parameter])
    plt.title('Monthly report of ' + parameter + " of "+ str(month) + "/" + str(year))
    plt.xlabel('Dates')
    plt.ylabel(parameter)

    buf = BytesIO()
    fig.savefig(buf, format='png', dpi=300)
    plt.close(fig)
    graphs_generated.append(base64.b64encode(buf.getvalue()).decode('utf-8'))
    
    # Use render_template_string for the generated graph page, passing the variable
    return render_template("generated_graph.html", graphs=graphs_generated)

@app.route('/model-details')
def model_details():
    return render_template("model_details.html")

@app.route('/jupyter_notebook.html')
def call_notebook():
    return render_template("jupyter_notebook.html")

@app.route('/download/final_cleaned_data.csv')
def download_cleaned_file():
    path = f"final_cleaned_data.csv"  
    return send_file(path, as_attachment=True)

@app.route('/download/weather_AUS.csv')
def download_raw_file():
    path = f"weatherAUS.csv"  
    return send_file(path, as_attachment=True)

@app.route('/documentation')
def documentation():
    return render_template("documentation.html")

@app.route('/group')
def group_details():
    return render_template("group.html")

# Run the server of FLask on another port
def run_app():
    app.run(port=5000)
    
# Use threading to run the server on different Thread
thread = Thread(target=run_app)
thread.start()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
