In [1]:
pip install flask


Note: you may need to restart the kernel to use updated packages.


In [None]:
from flask import Flask, render_template, request, session, redirect, url_for, jsonify
from werkzeug.utils import secure_filename
from werkzeug.security import generate_password_hash, check_password_hash
import os
import sqlite3
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

app = Flask(__name__)
app.secret_key = 'your_secret_key'

UPLOAD_FOLDER = 'csv_files'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Ensure the upload directory exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

def create_connection():
    conn = None
    try:
        conn = sqlite3.connect('users.db')
    except sqlite3.Error as e:
        print(e)
    return conn

def create_table(conn):
    create_table_sql = """
        CREATE TABLE IF NOT EXISTS users (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            username TEXT NOT NULL UNIQUE,
            email TEXT NOT NULL UNIQUE,
            password TEXT NOT NULL
        );
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except sqlite3.Error as e:
        print(e) 

@app.route('/')
def home():
    return redirect(url_for('signup'))

@app.route('/signup', methods=['GET', 'POST'])
def signup():
    if request.method == 'POST':
        username = request.form['username']
        email = request.form['email']
        password = request.form['password']

        hashed_password = generate_password_hash(password)

        conn = create_connection()
        if conn is not None:
            try:
                if user_exists(username, conn):
                    return render_template('signup.html', error='Username already exists. Please choose a different username.')
                else:
                    cur = conn.cursor()
                    cur.execute("INSERT INTO users (username, email, password) VALUES (?, ?, ?)", (username, email, hashed_password))
                    conn.commit()
                    session['username'] = username
                    return redirect(url_for('upload_file'))  # Redirect to upload page after successful signup
            except sqlite3.Error as e:
                print(e)
                return render_template('error.html', message='Failed to sign up.')
            finally:
                conn.close()
        else:
            return render_template('error.html', message='Failed to connect to database.')
    else:
        return render_template('signup.html')

def user_exists(username, conn):
    cur = conn.cursor()
    cur.execute("SELECT * FROM users WHERE username = ?", (username,))
    return cur.fetchone() is not None

@app.route('/login', methods=['GET', 'POST'])
def login():
    if request.method == 'POST':
        username = request.form['username']
        password = request.form['password']

        conn = create_connection()
        if conn is not None:
            try:
                cur = conn.cursor()
                cur.execute("SELECT * FROM users WHERE username = ?", (username,))
                user = cur.fetchone()
                if user:
                    if check_password_hash(user[3], password):
                        session['username'] = username
                        return redirect(url_for('upload_file'))  # Redirect to upload page after successful login
                    else:
                        return render_template('login.html', error='Invalid username or password.')
                else:
                    return render_template('login.html', error='User is not registered.')
            except sqlite3.Error as e:
                print(e)
                return render_template('error.html', message='Failed to login.')
            finally:
                conn.close()
        else:
            return render_template('error.html', message='Failed to connect to database.')
    else:
        return render_template('login.html')
    
def logout():
    session.pop('username', None)
    return redirect(url_for('login'))

def generate_pie_chart(labels, values):
    trace = go.Pie(labels=labels, values=values)
    data = [trace]
    layout = go.Layout(title='Pie Chart')
    fig = go.Figure(data=data, layout=layout)
    return fig.to_html(full_html=False)

def generate_scatter_plot(x_data, y_data, x_label, y_label):
    trace = go.Scatter(x=x_data, y=y_data, mode='markers')
    layout = go.Layout(title='Scatter Plot', xaxis=dict(title=x_label), yaxis=dict(title=y_label))
    fig = go.Figure(data=[trace], layout=layout)
    return fig.to_html(full_html=False)

def generate_bar_chart(df, categorical_column):
    fig = px.bar(df, x=categorical_column, title=f'Bar Chart - {categorical_column}',
                 labels={categorical_column: categorical_column},
                 color_discrete_sequence=['#1f77b4'])

    fig.update_traces(marker_line_color='rgb(8,48,107)', 
                      marker_line_width=1.5,  
                      opacity=0.8)  

    fig.update_layout(
        xaxis_title=categorical_column,
        yaxis_title='Count',
        font=dict(size=14, color='black'),  
        title_font=dict(size=18, color='black'),  
        plot_bgcolor='white',  
        showlegend=False,  
        margin=dict(l=40, r=40, t=50, b=40)  
    )

    return fig.to_html(full_html=False)

def generate_histogram(df, numerical_column):
    fig = px.histogram(df, x=numerical_column, title=f'Histogram - {numerical_column}',
                       labels={numerical_column: numerical_column},
                       color_discrete_sequence=['#1f77b4'])

    fig.update_traces(marker_line_color='rgb(8,48,107)',  
                      marker_line_width=1.5,  
                      opacity=0.8)  

    fig.update_layout(
        xaxis_title=numerical_column,
        yaxis_title='Frequency',
        font=dict(size=14, color='black'),  
        title_font=dict(size=18, color='black'),  
        plot_bgcolor='white',  
        showlegend=False,  
        margin=dict(l=40, r=40, t=50, b=40)  
    )

    return fig.to_html(full_html=False)

@app.route('/upload', methods=['GET', 'POST'])
def upload_file():
    if 'username' not in session:
        return redirect(url_for('login'))

    if request.method == 'POST':
        if 'file' not in request.files:
            return render_template('upload.html', error='No file part')

        file = request.files['file']

        if file.filename == '':
            return render_template('upload.html', error='No selected file')

        if file and file.filename.endswith('.csv'):
            filename = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file.filename))
            file.save(filename)
            session['csv_file'] = filename

            df = pd.read_csv(filename)
            data_html = df.to_html(classes='table table-striped', index=False)

            return render_template('upload.html', success='File uploaded successfully.', data_html=data_html)
        else:
            return render_template('upload.html', error='Invalid file type. Please upload a CSV file.')

    return render_template('upload.html')

@app.route('/visualization', methods=['GET', 'POST'])
def visualize_data():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return render_template('visualization.html', error='No CSV file uploaded. Please upload a CSV file first.')

    df = pd.read_csv(session['csv_file'])
    columns = df.columns.tolist()

    chart = None

    if request.method == 'POST':
        vis_type = request.form['type']

        if vis_type == 'pie':
            category_column = request.form['category_column']
            value_column = request.form['value_column']
            labels = df[category_column]
            values = df[value_column]
            chart = generate_pie_chart(labels, values)

        elif vis_type == 'scatter':
            x_column = request.form['x_column']
            y_column = request.form['y_column']
            x_data = df[x_column]
            y_data = df[y_column]
            x_label = x_column
            y_label = y_column
            chart = generate_scatter_plot(x_data, y_data, x_label, y_label)

        elif vis_type == 'bar':
            categorical_column = request.form['categorical_column']
            chart = generate_bar_chart(df, categorical_column)

        elif vis_type == 'histogram':
            numerical_column = request.form['numerical_column']
            chart = generate_histogram(df, numerical_column)

    return render_template('visualization.html', chart=chart, columns=columns)

@app.route('/data', methods=['GET', 'POST'])
def data():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return render_template('data.html', error='No CSV file uploaded. Please upload a CSV file first.')

    df = pd.read_csv(session['csv_file'])
    columns = df.columns.tolist()

    if request.method == 'POST':
        data_type = request.form['type']
        aggregation_function = request.form['aggregation_function']

        if data_type == 'Pivot':
            index_column = request.form['index_column']
            category_column = request.form['category_column']
            value_column = request.form['value_column']

            if aggregation_function == 'sum':
                pivot = pd.pivot_table(df, values=value_column, index=index_column, columns=category_column, aggfunc='sum')
            elif aggregation_function == 'mean':
                pivot = pd.pivot_table(df, values=value_column, index=index_column, columns=category_column, aggfunc='mean')
            elif aggregation_function == 'count':
                pivot = pd.pivot_table(df, values=value_column, index=index_column, columns=category_column, aggfunc='count')

            pivot_html = pivot.to_html(classes='table table-striped')

            return render_template('data.html', pivot_html=pivot_html, columns=columns)

        elif data_type == 'Groupby':
            index_column = request.form['index_column']
            category_column = request.form['category_column']
            value_column = request.form['value_column']

            if aggregation_function == 'sum':
                grouped_df = df.groupby(index_column).agg({value_column: 'sum'}).reset_index()
            elif aggregation_function == 'mean':
                grouped_df = df.groupby(index_column).agg({value_column: 'mean'}).reset_index()
            elif aggregation_function == 'count':
                grouped_df = df.groupby(index_column).agg({value_column: 'count'}).reset_index()

            grouped_html = grouped_df.to_html(classes='table table-striped', index=False)

            return render_template('data.html', grouped_html=grouped_html, columns=columns)

    return render_template('data.html', columns=columns)

@app.route('/metrics', methods=['GET', 'POST'])
def metrics():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return jsonify({'error': 'No CSV file uploaded. Please upload a CSV file first.'}), 400

    if request.method == 'POST':
        df = pd.read_csv(session['csv_file'])
        columns = df.columns.tolist()

        if 'y_true' not in request.form or 'y_pred' not in request.form:
            return render_template('metrics.html', error='Both "y_true" and "y_pred" fields are required.', columns=columns)

        y_true = request.form['y_true']
        y_pred = request.form['y_pred']

        try:
            X = df.drop([y_true, y_pred], axis=1)
            y = df[y_true]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
            mae_train = mean_absolute_error(y_train, df.loc[X_train.index, y_pred])
            mse_train = mean_squared_error(y_train, df.loc[X_train.index, y_pred])
            mae_test = mean_absolute_error(y_test, df.loc[X_test.index, y_pred])
            mse_test = mean_squared_error(y_test, df.loc[X_test.index, y_pred])
            return render_template('metrics.html', mae_train=mae_train, mse_train=mse_train, mae_test=mae_test, mse_test=mse_test, columns=columns)
        except KeyError as e:
            return render_template('metrics.html', error=f'Column "{str(e)}" not found in the CSV file.', columns=columns)

    return render_template('metrics.html')

def create_interactive_heatmap(df):
    df_numeric = df.apply(pd.to_numeric, errors='ignore')
    df_numeric = df_numeric.select_dtypes(include='number')

    correlation_matrix = df_numeric.corr()
    fig = px.imshow(correlation_matrix, color_continuous_scale='RdBu', title='Heatmap',
                    labels=dict(color='Correlation'), width=800, height=600)
    fig.update_layout(
        xaxis=dict(ticks="outside", tickangle=45),
        yaxis=dict(ticks="outside")
    )
    fig.update_traces(hoverinfo="all", hovertemplate="Column %{x}<br>Column %{y}<br>Correlation: %{z}<extra></extra>")
    plot_html = fig.to_html(full_html=False)
    return plot_html

@app.route('/heatmap', methods=['GET'])
def heatmap():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return render_template('heatmap.html', error='No CSV file uploaded. Please upload a CSV file first.')

    df = pd.read_csv(session['csv_file'])
    plot_html = create_interactive_heatmap(df)

    return render_template('heatmap.html', plot_html=plot_html)

def summarize_data(df):
    summary_stats = {}
    numeric_cols = df.select_dtypes(include='number').columns.tolist()

    for col in numeric_cols:
        col_summary = {
            'mean': df[col].mean(),
            'median': df[col].median(),
            'min': df[col].min(),
            'max': df[col].max(),
            'std_dev': df[col].std()
        }
        summary_stats[col] = col_summary

    return summary_stats

@app.route('/summary', methods=['GET'])
def summary():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return jsonify({'error': 'No CSV file uploaded. Please upload a CSV file first.'}), 400

    df = pd.read_csv(session['csv_file'])
    summary_data = summarize_data(df)

    return render_template('summary.html', summary=summary_data)

def calculate_all_correlations(df, column_x, column_y):
    if column_x not in df.columns or column_y not in df.columns:
        raise ValueError("One or both specified columns not found in the DataFrame.")

    df[column_x] = pd.to_numeric(df[column_x], errors='coerce')
    df[column_y] = pd.to_numeric(df[column_y], errors='coerce')

    df = df.dropna(subset=[column_x, column_y])

    pearson_corr = df[column_x].corr(df[column_y], method='pearson')
    spearman_corr = df[column_x].corr(df[column_y], method='spearman')
    kendall_corr = df[column_x].corr(df[column_y], method='kendall')

    return pearson_corr, spearman_corr, kendall_corr

@app.route('/correlation', methods=['GET', 'POST'])
def correlation():
    if 'username' not in session:
        return redirect(url_for('login'))

    if 'csv_file' not in session:
        return jsonify({'error': 'No CSV file uploaded. Please upload a CSV file first.'}), 400

    df = pd.read_csv(session['csv_file'])
    columns = df.columns.tolist()

    if request.method == 'POST':
        column_x = request.form['column_x']
        column_y = request.form['column_y']

        try:
            pearson_corr, spearman_corr, kendall_corr = calculate_all_correlations(df, column_x, column_y)
            return render_template('correlation.html', pearson_corr=pearson_corr, spearman_corr=spearman_corr, kendall_corr=kendall_corr, column_x=column_x, column_y=column_y)
        except ValueError as e:
            return render_template('correlation.html', error=str(e), columns=columns)

    return render_template('correlation.html', columns=columns)

if __name__ == '__main__':
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [21/May/2024 11:40:36] "GET / HTTP/1.1" 302 -
127.0.0.1 - - [21/May/2024 11:40:37] "GET /signup HTTP/1.1" 200 -
127.0.0.1 - - [21/May/2024 11:40:37] "GET /static/EY.png HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:40:37] "GET /static/upload.css HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:41:09] "POST /signup HTTP/1.1" 302 -
127.0.0.1 - - [21/May/2024 11:41:09] "GET /upload HTTP/1.1" 200 -
127.0.0.1 - - [21/May/2024 11:41:09] "GET /static/EY.png HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:41:09] "GET /static/upload.css HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:41:14] "GET /login HTTP/1.1" 200 -
127.0.0.1 - - [21/May/2024 11:41:14] "GET /static/upload.css HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:41:14] "GET /static/EY.png HTTP/1.1" 304 -
127.0.0.1 - - [21/May/2024 11:41:33] "POST /login HTTP/1.1" 302 -
127.0.0.1 - - [21/May/2024 11:41:33] "GET /upload HTTP/1.1" 200 -
127.0.0.1 - - [21/May/2024 11:41:33] 