# Data Processing``

### Importing required tools 

In [1]:
from os import path, listdir
from IPython.display import display as disp
import numpy as np 
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt

### Data Processing Steps 

1. Raw Data Collection 
2. Raw Data Correction
3. Intermediate Data : Box information(Bx,By,Bh,Bw)
4. Classes Encoding(sign : 1, others : 0 ||  amount : 1, others : 0)
5. Data Orientation(4*(1+4+4)=36 +1 = 37 column)
6. Data Normalisation

In [2]:
# # Raw Data Extraction tool 
# from collection_and_process.dataCollector import extract_data_in_loop

### Raw Data Correction

|X|Y|
|------|------|
|Ax=Dx | Ay=By|
|Bx=Cx | Cy=Dy|

In [21]:
raw_data_path = "./csv/"
df_list = [pd.read_csv(path.join(raw_data_path,file)) for file in listdir(raw_data_path)]
raw_df=pd.concat(df_list, ignore_index=True)
raw_df.head(10)

Unnamed: 0,FileName,IMG_HEIGHT,IMG_WIDTH,ClassName,P,Ax,Ay,Bx,By,Cx,Cy,Dx,Dy
0,X_000.jpeg,930,2240,Signature,1,1290.0,648.0,2030.0,660.0,2042.0,840.0,1302.0,849.0
1,X_000.jpeg,930,2240,Date,1,1433.0,227.0,1755.0,226.0,1755.0,294.0,1426.0,294.0
2,X_000.jpeg,930,2240,Amount,1,1785.0,339.0,2072.0,338.0,2074.0,408.0,1783.0,408.0
3,X_000.jpeg,930,2240,AcNo,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,X_001.jpeg,853,2240,Signature,1,1477.0,529.0,2149.0,534.0,2163.0,771.0,1486.0,768.0
5,X_001.jpeg,853,2240,Date,1,1757.0,183.0,1955.0,182.0,1955.0,229.0,1757.0,234.0
6,X_001.jpeg,853,2240,Amount,1,1946.0,400.0,2177.0,404.0,2182.0,452.0,1946.0,453.0
7,X_001.jpeg,853,2240,AcNo,0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
8,X_002.jpeg,823,2240,Signature,1,1444.0,511.0,1892.0,512.0,1895.0,674.0,1433.0,677.0
9,X_002.jpeg,823,2240,Date,1,1335.0,210.0,1825.0,210.0,1822.0,262.0,1332.0,268.0


In [33]:
corrected_df = raw_df.iloc[:,:5]
corrected_df['Ax'] = (raw_df.Ax+raw_df.Dx)/2
corrected_df['Ay'] = (raw_df.By+raw_df.Ay)/2
corrected_df['Cx'] = (raw_df.Bx+raw_df.Cx)/2
corrected_df['Cy'] = (raw_df.Cy+raw_df.Dy)/2

.head()

Unnamed: 0,FileName,IMG_HEIGHT,IMG_WIDTH,ClassName,P,Ax,Ay,Cx,Cy
0,X_000.jpeg,930,2240,Signature,1,1296.0,654.0,2036.0,844.5
1,X_000.jpeg,930,2240,Date,1,1429.5,226.5,1755.0,294.0
2,X_000.jpeg,930,2240,Amount,1,1784.0,338.5,2073.0,408.0
3,X_000.jpeg,930,2240,AcNo,0,-1.0,-1.0,-1.0,-1.0
4,X_001.jpeg,853,2240,Signature,1,1481.5,531.5,2156.0,769.5


### Box Information Data generation

In [36]:
intermediate_df = corrected_df.iloc[:,:5]
Cx,Ax,Cy,Ay = corrected_df.Cx, corrected_df.Ax, corrected_df.Cy, corrected_df.Ay
intermediate_df['Bx'] = (Cx+Ax)/2 # Simplyfing Ax+(Cx-Ax)/2
intermediate_df['By'] = (Cy+Ay)/2 # Simplyfing Ay+(Cy-Ay)/2
intermediate_df['Bw'] = np.abs(Cx-Ax)
intermediate_df['Bh'] = np.abs(Cy-Ay)

intermediate_df.head(10)

Unnamed: 0,FileName,IMG_HEIGHT,IMG_WIDTH,ClassName,P,Bx,By,Bw,Bh
0,X_000.jpeg,930,2240,Signature,1,1666.0,749.25,740.0,190.5
1,X_000.jpeg,930,2240,Date,1,1592.25,260.25,325.5,67.5
2,X_000.jpeg,930,2240,Amount,1,1928.5,373.25,289.0,69.5
3,X_000.jpeg,930,2240,AcNo,0,-1.0,-1.0,0.0,0.0
4,X_001.jpeg,853,2240,Signature,1,1818.75,650.5,674.5,238.0
5,X_001.jpeg,853,2240,Date,1,1856.0,207.0,198.0,49.0
6,X_001.jpeg,853,2240,Amount,1,2062.75,427.25,233.5,50.5
7,X_001.jpeg,853,2240,AcNo,0,-1.0,-1.0,0.0,0.0
8,X_002.jpeg,823,2240,Signature,1,1666.0,593.5,455.0,164.0
9,X_002.jpeg,823,2240,Date,1,1578.5,237.5,490.0,55.0
