Сохранить на Google Диске . Вы можете сохранить файл Python на Google Диске, а затем загружать его оттуда при необходимости.

Чтобы сохранить на Google Диск:

In [None]:
%%writefile /content/drive/MyDrive/BINANCE/data_processor.py

Writing /content/drive/MyDrive/BINANCE/data_processor.py


In [None]:
from sklearn import preprocessing
import pandas as pd
import logging
import matplotlib.pyplot as plt
import pickle
from google.colab import drive

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Function to load and preprocess data
def load_and_preprocess_data(file_path):
    try:
        if file_path.endswith('.csv'):
            df = pd.read_csv(file_path)
        else:
            logger.error(f"Unsupported file format: {file_path}")
            return None
        return df
    except FileNotFoundError:
        logger.error(f"File {file_path} not found.")
        return None

def test_load_and_preprocess_data_v1():
    file_path = '/content/drive/MyDrive/BINANCE/export_oracle_10_10_23.csv'
    df = load_and_preprocess_data(file_path)
    assert df is not None, f"Failed to load data from {file_path}"

def test_load_and_preprocess_data_v2():
    file_path = '/content/drive/MyDrive/BINANCE/export_oracle_10_10_23.csv'
    df = load_and_preprocess_data(file_path)
    assert df is not None, f"Failed to load data from {file_path}"

df = load_and_preprocess_data('/content/drive/MyDrive/BINANCE/export_oracle_10_10_23.csv')
print(df.head())  # Print the first few rows of the DataFrame

import matplotlib.pyplot as plt
import pdb
df = load_and_preprocess_data('/content/drive/MyDrive/BINANCE/export_oracle_10_10_23.csv')
#df.hist()
plt.show()


import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
import logging

def load_and_preprocess_data(file_path):
    # Assuming you have this function defined somewhere earlier
    pass

class DataProcessor:
    def __init__(self, file_path: str, scaler_path: str):
        self.file_path = file_path
        self.scaler_path = scaler_path
        self.df = None
        self.logger = self.configure_logging()

    @staticmethod
    def configure_logging():
        logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        return logging.getLogger(__name__)


    def read_data(self):
        """Reads the data from the file path."""
        if not os.path.exists(self.file_path):
            self.logger.error(f"File {self.file_path} not found.")
            return

        try:
            self.df = pd.read_csv(self.file_path)
            if 'EVENT_TYPE' in self.df.columns:
                self.df.drop(columns=['EVENT_TYPE'], inplace=True)
            self.logger.info(f'Successfully read data from {self.file_path}')
        except Exception as e:
            self.logger.error(f"Unexpected error while reading data from {self.file_path}: {e}", exc_info=True)

    @staticmethod
    def handle_missing_values(df):
        """Handles missing values in the dataframe."""
        df.ffill(inplace=True)
        return df

    @staticmethod
    def convert_to_datetime(df, columns):
        """Converts specified columns to datetime."""
        for col in columns:
            df[col] = pd.to_datetime(df[col])
        return df

    @staticmethod
    def scale_columns(df, columns):
        """Scales specified columns using MinMaxScaler."""
        scaler = MinMaxScaler()
        df[columns] = scaler.fit_transform(df[columns])
        return df, scaler

    def preprocess(self):
        """Preprocesses the dataframe."""
        if self.df is None:
            self.logger.error("Dataframe is None in preprocess method.")
            return None, None

        required_columns = ["EVENT_TIME", "TRADE_TIME", "PRICE", "QUANTITY", "IS_BUYER_MARKET_MAKER"]
        missing_columns = [col for col in required_columns if col not in self.df.columns]

        if missing_columns:
            self.logger.warning(f"Missing required columns in the DataFrame: {missing_columns}")
            for col in missing_columns:
                self.df[col] = None

        self.df = self.handle_missing_values(self.df)
        self.df = self.convert_to_datetime(self.df, ["EVENT_TIME", "TRADE_TIME"])

        self.df['IS_BUYER_MARKET_MAKER'] = self.df['IS_BUYER_MARKET_MAKER'].map({'Y': 1, 'N': 0}, na_action='ignore')
        self.df['TRADE_HOUR'] = self.df['TRADE_TIME'].dt.hour

        self.df, scaler = self.scale_columns(self.df, ["PRICE", "QUANTITY"])

        self.df.drop(columns=["EVENT_TIME", "TRADE_TIME"], inplace=True)

        return scaler

    def save_scaler(self, scaler):
        """Saves the scaler object to a file."""
        directory = os.path.dirname(self.scaler_path)
        if not os.path.exists(directory):
            os.makedirs(directory)

        try:
            with open(self.scaler_path, 'wb') as f:
                pickle.dump(scaler, f)
            self.logger.info(f"Scaler saved to {self.scaler_path}")
        except Exception as e:
            self.logger.error(f"Error saving scaler: {e}", exc_info=True)

    def process(self):
        """Main processing method."""
        self.read_data()
        scaler = self.preprocess()
        if scaler:
            self.save_scaler(scaler)
        self.show_dataframe()
        self.save_dataframe()  # Add this line to save the DataFrame
        return self.df

    def show_dataframe(self):
        """Displays the dataframe."""
        if self.df is not None:
            display(self.df)
        else:
            self.logger.error("Dataframe is not available or hasn't been processed yet.")
    def save_dataframe(self):
        try:
            self.df.to_csv("/content/drive/MyDrive/BINANCE/processed_data.csv", index=False)
            self.logger.info(f"Dataframe saved to /content/drive/MyDrive/BINANCE/processed_data.csv")
        except Exception as e:
            self.logger.error(f"Error saving dataframe: {e}", exc_info=True)


# Main execution
if __name__ == '__main__':
    # Mount Google Drive
    drive.mount('/content/drive')

    file_path = '/content/drive/MyDrive/BINANCE/export_oracle_10_10_23.csv'
    scaler_path = '/content/drive/MyDrive/BINANCE/scaler.pkl'

    processor = DataProcessor(file_path, scaler_path)
    processed_df = processor.process()

  EVENT_TYPE         EVENT_TIME   SYMBOL    TRADE_ID     PRICE  QUANTITY  \
0      trade  11.10.23 11:38:38  BTCUSDT  3235808193  27219.32   0.00970   
1      trade  11.10.23 11:38:38  BTCUSDT  3235808194  27219.33   0.00200   
2      trade  11.10.23 11:38:38  BTCUSDT  3235808198  27219.33   0.00100   
3      trade  11.10.23 11:38:38  BTCUSDT  3235808196  27219.33   0.00509   
4      trade  11.10.23 11:38:38  BTCUSDT  3235808197  27219.33   0.00200   

   BUYER_ORDER_ID  SELLER_ORDER_ID         TRADE_TIME IS_BUYER_MARKET_MAKER  
0     22695317626      22695318752  11.10.23 11:38:38                     Y  
1     22695318754      22695317119  11.10.23 11:38:38                     N  
2     22695318815      22695317119  11.10.23 11:38:38                     N  
3     22695318760      22695317119  11.10.23 11:38:38                     N  
4     22695318786      22695317119  11.10.23 11:38:38                     N  
Drive already mounted at /content/drive; to attempt to forcibly remount, ca

Unnamed: 0,SYMBOL,TRADE_ID,PRICE,QUANTITY,BUYER_ORDER_ID,SELLER_ORDER_ID,IS_BUYER_MARKET_MAKER,TRADE_HOUR
0,BTCUSDT,3235808193,0.382538,0.000266,22695317626,22695318752,1,11
1,BTCUSDT,3235808194,0.382553,0.000055,22695318754,22695317119,0,11
2,BTCUSDT,3235808198,0.382553,0.000027,22695318815,22695317119,0,11
3,BTCUSDT,3235808196,0.382553,0.000140,22695318760,22695317119,0,11
4,BTCUSDT,3235808197,0.382553,0.000055,22695318786,22695317119,0,11
...,...,...,...,...,...,...,...,...
1212416,BTCUSDT,3234595783,0.897415,0.003802,22680259340,22680258223,0,11
1212417,BTCUSDT,3234595784,0.897415,0.000020,22680259340,22680258246,0,11
1212418,BTCUSDT,3234595785,0.897415,0.000242,22680259340,22680258531,0,11
1212419,BTCUSDT,3234595786,0.897415,0.000144,22680259340,22680259037,0,11


In [None]:
with open('/content/drive/MyDrive/BINANCE/data_processor.py', 'r') as file:
    content = file.read()
    print(content)


 


In [None]:
file_path = '/content/drive/MyDrive/BINANCE/data_processor.py'

# Check the size of the file
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# If file size is not zero, read and print its content
if file_size != 0:
    with open(file_path, 'r') as file:
        content = file.read()
        print(content)
else:
    print("The file is empty.")


File size: 1 bytes
 
