In [105]:
from flask import Flask, render_template, request, send_from_directory, redirect, url_for, flash
import os
import pandas as pd
import lxml
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import cx_Oracle
app = Flask(__name__)
app.secret_key = 'your_secret_key_here'


Conncting to oracle Database


In [106]:
##dsn_tns = cx_Oracle.makedsn('pc0308', '1521', service_name='XE')
##conn = cx_Oracle.connect(user='ivan', password='password', dsn=dsn_tns)

In [107]:
UPLOAD_FOLDER = 'uploads'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

uploaded_tables = {}  # Global dictionary to store uploaded tables

@app.route('/', methods=['GET', 'POST'])
def upload_files():
    global uploaded_tables
    
    if request.method == 'POST':
        files = request.files.getlist('file')

        for file in files:
            if file.filename != '':
                file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename)
                file.save(file_path)
                data = pd.read_csv(file_path)
                uploaded_tables[file.filename] = data.to_html(index=False)  # Modified line
    
    return render_template('index.html', uploaded_files=uploaded_tables.keys())



@app.route('/uploads/<filename>')
def display_uploaded(filename):
    table_html = uploaded_tables.get(filename, None)
    if table_html is not None:
        # Get DataFrame from the global dictionary
        df = pd.read_html(table_html, header=0)[0]
        
        # Get the columns of the DataFrame and filter out "Unnamed" columns
        column_options = [col for col in df.columns if not col.startswith("Unnamed")]
        
        return render_template('uploaded.html', filename=filename, table=table_html, column_options=column_options)
    else:
        return "File not found."




In [108]:
@app.route('/all_tables')
def all_tables():
    return render_template('all_tables.html', uploaded_tables=uploaded_tables)


Merging

In [109]:
@app.route('/uploads/<filename>/merge', methods=['POST'])
def merge_tables(filename):
    table_to_merge = request.form.get('table_to_merge')
    attribute = request.form.get('attribute')
    
    if table_to_merge and attribute:
        df1 = pd.read_html(uploaded_tables[filename], header=0)[0]
        df2 = pd.read_html(uploaded_tables[table_to_merge], header=0)[0]
        
        merged_df = pd.merge(df1, df2, on=attribute, how='outer')
        uploaded_tables[filename] = merged_df.to_html(index=False)
        flash(f"Tables merged on '{attribute}'", 'success')
    else:
        flash("Please select a table and attribute to merge.", 'error')
    
    return redirect(url_for('display_uploaded', filename=filename))


Option to remove unwanted columns

In [110]:
@app.route('/uploads/<filename>/remove_column', methods=['POST'])
def remove_column(filename):
    selected_column = request.form.get('selected_column')
    
    table_html = uploaded_tables.get(filename, None)
    if table_html:
        df = pd.read_html(table_html, header=0)[0]
    else:
        flash("Error accessing the DataFrame.", 'error')
        return redirect(url_for('display_uploaded', filename=filename))

    if selected_column:
        try:
            df.drop(columns=[selected_column], inplace=True)
            uploaded_tables[filename] = df.to_html()  # Update the global dictionary with the modified DataFrame
            flash(f"Column '{selected_column}' has been removed from the DataFrame.", 'success')
        except KeyError:
            flash(f"Column '{selected_column}' not found in the DataFrame.", 'error')
    else:
        flash("Please select a column to remove.", 'error')

    return redirect(url_for('display_uploaded', filename=filename))


Fucntions to preform on a dataset

In [111]:
@app.route('/uploads/<filename>', methods=['POST'])
def apply_action(filename):
    table_html = uploaded_tables.get(filename, None)
    if table_html is not None:
        action = request.form.get('action')
        data_html = uploaded_tables[filename]
        data = pd.read_html(data_html, flavor='html5lib')[0]
        if action == 'remove_duplicates':
            data = remove_duplicates(data)
        elif action == 'remove_missing_values':
            data = remove_missing_values(data)
        elif action == 'convert_boolean_to_binary':
            data = convert_boolean_to_binary(data)
        elif action == 'min_max_scaling':
            data = min_max_scaling(data)
        elif action == 'z_score_normalization':
            data = z_score_normalization(data)
        uploaded_tables[filename] = data.to_html(index=False)
        return redirect(url_for('display_uploaded', filename=filename))
    else:
        return "File not found."

def convert_boolean_to_binary(df):
    return df.applymap(lambda x: int(x) if isinstance(x, bool) else x)

def remove_duplicates(df):
    return df.drop_duplicates()

def remove_missing_values(df):
    return df.dropna()

def min_max_scaling(df):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

def z_score_normalization(df):
    scaler = StandardScaler()
    return pd.DataFrame(scaler.fit_transform(df), columns=df.columns)


In [112]:
# add missing data finder
# add missing data filling


In [113]:
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://192.168.216.115:5000
Press CTRL+C to quit
127.0.0.1 - - [21/Aug/2023 14:45:43] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [21/Aug/2023 14:45:43] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [21/Aug/2023 14:45:47] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [21/Aug/2023 14:45:47] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [21/Aug/2023 14:45:48] "GET /uploads/sample_submission.csv HTTP/1.1" 200 -
127.0.0.1 - - [21/Aug/2023 14:45:48] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [21/Aug/2023 14:45:49] "POST /uploads/sample_submission.csv HTTP/1.1" 302 -
127.0.0.1 - - [21/Aug/2023 14:45:49] "GET /uploads/sample_submission.csv HTTP/1.1" 200 -
127.0.0.1 - - [21/Aug/2023 14:45:49] "GET /static/styles.css HTTP/1.1" 304 -
127.0.0.1 - - [21/Aug/2023 14:45:57] "POST /uploads/sample_submission.csv HTTP/1.1" 302 -
127.0.0.1 - - [21/Aug/2023 14:45:57] "GET /uploads/sample_submission.csv HTT