In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
dego = pd.read_csv("./data/DEGOUSDT.csv")
dia = pd.read_csv("./data/DIAUSDT.csv")

In [3]:
dego.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4767 entries, 0 to 4766
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Time    4767 non-null   int64  
 1   Open    4767 non-null   float64
 2   Close   4767 non-null   float64
 3   Volume  4767 non-null   float64
dtypes: float64(3), int64(1)
memory usage: 149.1 KB


In [4]:
average_value = dego["Volume"].mean()

In [5]:
print(average_value)

6539.160329347598


In [6]:
higher_than=3*average_value

In [7]:
dego["volume_change"] = dego["Volume"] / dego["Volume"].shift(fill_value=dego["Volume"].iloc[0])
dia["volume_change"] = dia["Volume"] / dia["Volume"].shift(fill_value=dia["Volume"].iloc[0])

In [8]:
dego["amplitude"]=(dego["Close"]-dego["Open"])/dego["Open"]

In [9]:
dego["pump"] = np.where(dego["amplitude"] > 0.0025, 1, 0)

In [10]:
dego["pump"].value_counts()

pump
0    4023
1     744
Name: count, dtype: int64

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [12]:
data = {
    'volume_change': dego["volume_change"].tolist(),
    'Pump': dego["pump"].tolist()
}

new_data = {
    'volume_change' : dia["volume_change"].tolist()
}

In [13]:
df = pd.DataFrame(data)
df['volume_change'] = df['volume_change'].fillna(df['volume_change'].mean())

df['volume_change'] = df['volume_change'].replace([np.inf, -np.inf], 1)

df2 = pd.DataFrame(new_data)
# Replace NaN values in 'volume_change' with the mean
df2['volume_change'] = df2['volume_change'].fillna(df2['volume_change'].mean())

# Replace infinite values with a finite number (e.g., 1)
df2['volume_change'] = df2['volume_change'].replace([np.inf, -np.inf], 1)


In [14]:
X_train, X_test, y_train, y_test = train_test_split(df[['volume_change']], df['Pump'], test_size=0.2, random_state=42)
X_new_selected = df2[['volume_change']]

In [15]:
from imblearn.under_sampling import RandomUnderSampler
undersampler = RandomUnderSampler(random_state=42)

X_train_resampled, y_train_resampled = undersampler.fit_resample(X_train, y_train)
X_train = X_train_resampled
y_train = y_train_resampled


In [16]:
model = LogisticRegression()

In [17]:
model.fit(X_train, y_train)

In [18]:
predictions = model.predict(X_test)
# new_data['predicted_pump'] = predictions.tolist()
# Display the DataFrame with predictions
# print(new_data)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
classification_rep = classification_report(y_test, predictions)

# Print results
print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', classification_rep)

Accuracy: 0.16
Confusion Matrix:
 [[  9 803]
 [  0 142]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.01      0.02       812
           1       0.15      1.00      0.26       142

    accuracy                           0.16       954
   macro avg       0.58      0.51      0.14       954
weighted avg       0.87      0.16      0.06       954



In [19]:

threshold = 3
y_pred_train = (X_train['volume_change'] > threshold).astype(int)
y_pred_test = (X_test['volume_change'] > threshold).astype(int)

accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)

conf_matrix_train = confusion_matrix(y_train, y_pred_train)
conf_matrix_test = confusion_matrix(y_test, y_pred_test)

classification_rep_train = classification_report(y_train, y_pred_train)
classification_rep_test = classification_report(y_test, y_pred_test)

print(f'Training Accuracy: {accuracy_train:.2f}')
print('Training Confusion Matrix:\n', conf_matrix_train)
print('Training Classification Report:\n', classification_rep_train)

print(f'\nTesting Accuracy: {accuracy_test:.2f}')
print('Testing Confusion Matrix:\n', conf_matrix_test)
print('Testing Classification Report:\n', classification_rep_test)


Training Accuracy: 0.53
Training Confusion Matrix:
 [[473 129]
 [440 162]]
Training Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.79      0.62       602
           1       0.56      0.27      0.36       602

    accuracy                           0.53      1204
   macro avg       0.54      0.53      0.49      1204
weighted avg       0.54      0.53      0.49      1204


Testing Accuracy: 0.73
Testing Confusion Matrix:
 [[650 162]
 [ 91  51]]
Testing Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.80      0.84       812
           1       0.24      0.36      0.29       142

    accuracy                           0.73       954
   macro avg       0.56      0.58      0.56       954
weighted avg       0.78      0.73      0.76       954



In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
from sklearn.utils.class_weight import compute_sample_weight


rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'class_weight': ['balanced', 'balanced_subsample', None],
}

gb_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

best_gb_model = None
best_accuracy = 0

for params in ParameterGrid(gb_param_grid):
    gb_model = GradientBoostingClassifier(random_state=42, **params)
    gb_model.fit(X_train, y_train, sample_weight=compute_sample_weight(class_weight='balanced', y=y_train))
    predictions = gb_model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_gb_model = gb_model

# Evaluating the best model
predictions = best_gb_model.predict(X_new_selected).tolist()
print(predictions)
# accuracy = accuracy_score(y_test, predictions)
# conf_matrix = confusion_matrix(y_test, predictions)
# classification_rep = classification_report(y_test, predictions)

# # Print results
# print(f'Best Accuracy: {best_accuracy:.2f}')
# print('Best Parameters:', best_gb_model.get_params())
# print('Confusion Matrix:\n', conf_matrix)
# print('Classification Report:\n', classification_rep)

# Random Forest
# rf_model = RandomForestClassifier(random_state=42)
# rf_grid = GridSearchCV(rf_model, rf_param_grid, cv=5, scoring='accuracy', n_jobs=-1)
# rf_grid.fit(X_train, y_train)
# rf_best_model = rf_grid.best_estimator_

# Evaluate Random Forest
rf_predictions = rf_best_model.predict(X_new_selected).tolist()
print(rf_predictions)
# rf_accuracy = accuracy_score(y_test, rf_predictions)
# rf_conf_matrix = confusion_matrix(y_test, rf_predictions)
# rf_classification_rep = classification_report(y_test, rf_predictions)


In [None]:
print("Random Forest Results:")
print(f'Accuracy: {rf_accuracy:.2f}')
print('Confusion Matrix:\n', rf_conf_matrix)
print('Classification Report:\n', rf_classification_rep)
print("\n------------------------\n")

In [None]:
dego["change"]=(dego["Close"]-dego["Open"])/dego["Open"]
dego["change"]*=100

In [None]:
average_volume=np.mean(dego["Volume"])
dego["high_trade_high_pump"]=np.where(dego["Volume"]>30*average_volume,dego["volume_change"],0)
dego["change"]*=100

In [None]:
corr_matrix=dego.corr(numeric_only=True)   
corr_matrix["change"].sort_values(ascending=False)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

X = dego[["3xaverage_volume"]]  # Convert to DataFrame
y = dego["pump"]

feature_scaler = StandardScaler()
scaled_features = feature_scaler.fit_transform(X)

model = LinearRegression()
model.fit(scaled_features, y)

some_new_data = some_new_data[["3xaverage_volume"]]  # Convert to DataFrame
scaled_new_data = feature_scaler.transform(some_new_data)
predictions = model.predict(scaled_new_data)



In [None]:
predictions