In [1]:
import pandas as pd
import numpy as np
import pickle as pkl 
from tqdm import tqdm
import os
## Transductive learning (label propagation)
from sklearn.semi_supervised import LabelPropagation

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [3]:
def labelProp(n):
    
    model = pkl.load(open("D:/stage_ete_SDC_data/MLModels/Model_label_propagation_warp.pkl", "rb"))
    for i in tqdm(range(n)):
        df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv")

#         df['frame_id'] = i
#         df['mask_id'] = i
        
        if df.shape[1] == 6:
            y_pred = model.predict(df.iloc[:,:-2])
        elif df.shape[1] == 5:
            y_pred = model.predict(df.iloc[:,:-1])
        else:
            y_pred = model.predict(df)

        df['label'] = y_pred
        
        df.to_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv", index=False)
        
def labelPropRefined(s,n):
    
    model = pkl.load(open("D:/stage_ete_SDC_data/MLModels/Model_label_propagation_warp.pkl", "rb"))
    for i in tqdm(range(s,n)):
        df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv")

#         df['frame_id'] = i
#         df['mask_id'] = i
        
        if df.shape[1] == 6:
            y_pred = model.predict(df.iloc[:,:-2])
        elif df.shape[1] == 5:
            y_pred = model.predict(df.iloc[:,:-1])
        else:
            y_pred = model.predict(df)

        df['label'] = y_pred
        
        df.to_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv", index=False)

In [4]:
def concatAllData(n):
    """
        This function takes a number n of 
        dataframes and concatenates them
        and return a new dataFrame
        Arguments:

            n: number of dataframes to iterate
            
        Return:
            dataframe
    """
    df_glob = {} ## Empty dataframe
    for i in tqdm(range(n)):
        df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv")
        
        if i == 0:
            df_glob = df
        else:
            df_glob = pd.concat([df_glob, df])
            df_glob.reset_index(drop=True, inplace=True)
    return df_glob

In [5]:
# labelProp(17020)
# labelPropRefined(634,1000)

In [6]:
# df_glob = concatAllData(17020)

In [7]:
# for i in tqdm(range(5500)):
#     df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data"+str(i)+".csv")
#     fig = px.scatter(df, x="pixel_number", y="row_number", color="label")
#     fig.write_image("D:/stage_ete_SDC_data/data_store_warp/plots_27_08_2022/plot"+str(i)+".jpg")

In [8]:
# df_glob.head()

In [9]:
# df_glob.shape

In [10]:
# df_glob.drop(["label"], axis=1, inplace=True)

In [11]:
# df_glob.rename(columns={"label_2":"label"}, inplace=True)

In [12]:
# df_glob

In [13]:
# df_glob.to_csv("D:/stage_ete_SDC_data/data_store/global_dataframe.csv", index=False)

## Mean Points in each row for each line:

In [30]:
def exportMeanDF(n, inPath, outPath="", outName="new_data", ouType="csv", s=0):
    """
        This function iterates n dataframes
        and, for each "row_number" for each "label", computes the mean
        of "pixel_number" and exports the new dataframes in the 
        specified path by the specified commen name and an index for each one.
        
        Arguments:
            n: Number of files to iterate from (csv or xlsx).
            inPath: The path of where the files are located.
            outPath: The path where we will export the new files.
                If the path is empty (default), it will take the location 
                of where the original files where and puts them in there.
            outName: Commen name of the new data frames. By default, 
                outName="new_data".
            outType: Type of the file (csv, xlsx). Currently it supports CSV and XLSX.
                By default, ouType="csv".
            s: the dataframe id to start iterating from. Default s=0.
            
    """
    ## Check if the variables are set correctly
    try:
        int(n) 
        try:
            int(inPath)
            return "ValueError: expected type str in 'inPath', instead it got number or str(int)."
        except:
            ##Check if inPath and outPath are valid
            try:
                assert os.path.dirname(inPath) != ""
                assert len(os.listdir(inPath)) != 0
                ## We start getting the dataframes
                for i in tqdm(range(s,n)):
                    df_glob = {}
                    df = pd.read_csv(inPath+"/data"+str(i)+".csv")
                    labels = df['label'].unique()
                    ##Iterating and processing by every label
                    for label in labels:
                        df_label = (df.loc[(df.label == label)].groupby(["frame_id","mask_id","row_number"]).mean()).reset_index(level=["row_number","frame_id","mask_id"])
                        if label == labels[0]:
                            df_glob = df_label
                        else:
                            df_glob = pd.concat([df_glob, df_label])
                    ##Reset the indexes
                    df_glob.reset_index(drop=True, inplace=True)
                    ##Generate the dataframe in the outpath
                    if ouType.lower() == "csv":
                        df_glob.to_csv(os.path.join(outPath, outName +str(i)+ '.csv'), index=False)
                    elif ouType.lower() == "xlsx":
                        df_glob.to_excel(os.path.join(outPath, outName + '.xlsx'), index=False)
            except Exception as e:
                return (f"{type(e).__name__}: {e.args}")
    except Exception as e:
        return (f"{type(e).__name__}: {e}")

In [31]:
exportMeanDF(17020,r"D:/stage_ete_SDC_data/data_store_warp/dataframes/", outPath=r"D:/stage_ete_SDC_data/data_store_warp/mean_dataframes/")

100%|████████████████████████████████████████████████████████████████████████████| 17020/17020 [08:14<00:00, 34.43it/s]


In [35]:
df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/dataframes/data200.csv")
fig = px.scatter(df, x="pixel_number", y="row_number", color="label")
fig.show()

In [36]:
df = pd.read_csv("D:/stage_ete_SDC_data/data_store_warp/mean_dataframes/new_data200.csv")
fig = px.scatter(df, x="pixel_number", y="row_number", color="label")
fig.show()