In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Loading data
train_transaction = pd.read_csv('train_transaction.csv')
train_identity = pd.read_csv('train_identity.csv')
test_transaction = pd.read_csv('test_transaction.csv')
test_identity = pd.read_csv('test_identity.csv')

train = pd.merge(train_transaction, train_identity, on='TransactionID', how='left')
test = pd.merge(test_transaction, test_identity, on='TransactionID', how='left')

# Division into numerical and categorical attributes
numerical_cols = train.select_dtypes(include=['float64', 'int64']).columns.drop('isFraud')
categorical_cols = train.select_dtypes(include=['object']).columns

# Make sure that the test data has all the columns from the training data
missing_cols = set(train.columns) - set(test.columns)
for col in missing_cols:
    test[col] = 0  

# Filling in missing values
train[numerical_cols] = train[numerical_cols].fillna(train[numerical_cols].median())
test[numerical_cols] = test[numerical_cols].fillna(test[numerical_cols].median())
# Filling in missing values in categorical columns
train[categorical_cols] = train[categorical_cols].fillna('NotFound')
test[categorical_cols] = test[categorical_cols].fillna('NotFound')

# Creating a data processing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Separating the data
X = train.drop('isFraud', axis=1)
y = train['isFraud']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Model Appraisal
model.fit(X_train, y_train)

# Оценка модели
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))

# Prediction on test data
test_pred = model.predict(test)

# Preparing the file to be sent
submission = pd.DataFrame({
    'TransactionID': test['TransactionID'],
    'isFraud': test_pred
})
submission.to_csv('submission.csv', index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'train_transaction.csv'

Accuracy: 0.9782910556439869

Precision: 0.953023758099352

Recall: 0.4160773220179161

F1-Score: 0.5792582868395143