# RP Data Set Creator (Numpy ver)

This file create dataset based on Csv file we gave to it,
out put income is some torch.tensor file's based on RP (Recurrence Plot) technique

## Libraries

In [3]:
import os
import torch
import numpy as np
import pandas as pd

## RP technique

In [8]:
class RecurrencePlot:
    def __init__(self, input_address, output_address, iteration=0, window_size=320, start=0, end_point=24960):
        self.input_address = input_address
        self.output_address = output_address
        self.window_size = window_size
        self.start = start
        self.end_point = end_point
        self.chunks_dict = {}
        self.iteration = iteration
        self.load_data()
        self.Saveon()

    def recurrence_plot_magnitude(self, A):
        N = len(A)
        distance_matrix = np.zeros((N, N))
        for i in range(N):
            for j in range(N):
                distance_matrix[i, j] = np.abs(A[i] - A[j])
        return distance_matrix

    def load_data(self):
        chunks_dict = {}
        address = self.input_address
        pre_df = pd.read_csv(address)
        df = pre_df.iloc[15:, [1, 3, 5]].values.astype(float)
        df = df[:, 0]
        for i in range(self.start, self.end_point, self.window_size):
            chunks_dict[i // self.window_size] = df[i:i + self.window_size]
        self.chunks_dict = chunks_dict
        os.makedirs(self.output_address, exist_ok=True)

    def Saveon(self):
        for key, chunk in self.chunks_dict.items():
            matrix = self.recurrence_plot_magnitude(chunk)

            # Convert to a NumPy array
            np_array = np.array(matrix, dtype=np.float32)

            base_filename = os.path.basename(self.input_address).replace('.csv', '')
            filename_array = f'{base_filename}_iter{key + self.iteration}.npy'
            full_path_array = os.path.join(self.output_address, filename_array)
            try:
                # Save as a .npy file
                np.save(full_path_array, np_array)
                print(f"Saved NumPy array: {full_path_array}, Array shape: {np_array.shape}")
            except Exception as e:
                print(f"Error saving array for chunk {key}: {e}")


## Creator

In [11]:
directory_out = r"G:\Thesis_Numpy_data_set\2_Class_320\2"
directory_in = r"G:\Thesis\Data Set\Machin_Train_URF\Machin_Train_URF\60%\1420"
limit, count = 5, 0
iteration = 0

for filename in sorted(os.listdir(directory_in)):
    if filename.endswith(".CSV") or filename.endswith(".csv"):  # Ensure only CSV files are processed
        if count == limit:
            break
        input_file_path = os.path.join(directory_in, filename)  # Create full path for each file
        RecurrencePlot(input_file_path, directory_out, iteration=iteration)

        count += 1
        # iteration = count * 156  # for 160by160
        iteration = count * 78  # for 320by320
        print(input_file_path)

Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter0.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter1.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter2.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter3.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter4.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter5.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter6.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter7.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320\2\ALL0001.CSV_iter8.npy, Array shape: (320, 320)
Saved NumPy array: G:\Thesis_Numpy_data_set\2_Class_320