In [12]:
from flask import Flask, render_template, request, send_from_directory, redirect, url_for
import os
import pandas as pd
import lxml
from sklearn.preprocessing import MinMaxScaler, StandardScaler
app = Flask(__name__)

In [13]:
# Folder to store uploaded files
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

uploaded_tables = {}  # Global dictionary to store uploaded tables

@app.route('/', methods=['GET', 'POST'])
def upload_files():
    global uploaded_tables
    
    if request.method == 'POST':
        files = request.files.getlist('file')

        for file in files:
            if file.filename != '':
                file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
                file.save(file_path)
                data = pd.read_csv(file_path)
                uploaded_tables[file.filename] = data.to_html()
    
    return render_template('index.html', uploaded_files=uploaded_tables.keys())

@app.route('/uploads/<filename>')
def display_uploaded(filename):
    table_html = uploaded_tables.get(filename, None)
    if table_html is not None:
        return render_template('uploaded.html', filename=filename, table=table_html)
    else:
        return "File not found."


In [14]:
@app.route('/uploads/<filename>', methods=['POST'])
def apply_filter_or_normalization(filename):
    table_html = uploaded_tables.get(filename, None)
    if table_html is not None:
        action = request.form.get('action')
        data_html = uploaded_tables[filename]
        data = pd.read_html(data_html, flavor='html5lib')[0]
        data = data.iloc[:, 1:]  
        if action == 'remove_duplicates':
            data = remove_duplicates(data)
        elif action == 'remove_missing_values':
            data = remove_missing_values(data)
        elif action == 'convert_boolean_to_binary':
            data = convert_boolean_to_binary(data)
        elif action == 'min_max_scaling':
            data = min_max_scaling(data)
        elif action == 'z_score_normalization':
            data = z_score_normalization(data)
        uploaded_tables[filename] = data.to_html()
        return redirect(url_for('display_uploaded', filename=filename))
    else:
        return "File not found."

def convert_boolean_to_binary(df):
    return df.applymap(lambda x: int(x) if isinstance(x, bool) else x)

def remove_duplicates(df):
    return df.drop_duplicates()

def remove_missing_values(df):
    return df.dropna()

def min_max_scaling(df):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

def z_score_normalization(df):
    scaler = StandardScaler()
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)


In [15]:
# add missing data finder
# add missing data filling


In [16]:
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.216.115:5000
Press CTRL+C to quit
127.0.0.1 - - [10/Aug/2023 13:45:45] "GET /uploads/sample_submission.csv HTTP/1.1" 200 -
127.0.0.1 - - [10/Aug/2023 13:45:46] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [10/Aug/2023 13:45:50] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [10/Aug/2023 13:45:50] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [10/Aug/2023 13:45:51] "GET /uploads/sample_submission.csv HTTP/1.1" 200 -
127.0.0.1 - - [10/Aug/2023 13:45:51] "GET /static/styles.css HTTP/1.1" 304 -
[2023-08-10 13:45:55,355] ERROR in app: Exception on /uploads/sample_submission.csv [POST]
Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\flask\app.py", line 2190, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\flask\app.py", line 1486, in full_dispatch_request
    rv = self.handle_user_exc