<a href="https://colab.research.google.com/github/ErdemAslans/ALL-IN-BANKER/blob/main/ALL_IN_BANKER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas numpy scikit-learn seaborn matplotlib
!pip install xgboost lightgbm
!pip install optuna
!pip install imbalanced-learn
!pip install keras tensorflow
!pip install plotly dash
!pip install joblib


Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.3-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.2/233.2 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [2]:
# kredi_skorlamasi.py

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
import optuna
import joblib

def load_data():
    data = pd.read_csv('give_me_some_credit.csv')
    return data

def preprocess_data(data):
    # Eksik değerlerin doldurulması
    data.fillna(data.median(), inplace=True)
    # Özellik ve hedef değişkenlerin ayrılması
    X = data.drop('SeriousDlqin2yrs', axis=1)
    y = data['SeriousDlqin2yrs']
    return X, y

def balance_data(X, y):
    # SMOTE ile dengesiz veri setinin dengelenmesi
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

def feature_engineering(X):
    # Yeni özelliklerin oluşturulması
    X['DebtRatio_MonthlyIncome'] = X['DebtRatio'] * X['MonthlyIncome']
    return X

def optimize_model(X_train, y_train):
    def objective(trial):
        param = {
            'objective': 'binary:logistic',
            'eval_metric': 'auc',
            'tree_method': 'hist',
            'booster': 'gbtree',
            'lambda': trial.suggest_float('lambda', 1e-8, 1.0, log=True),
            'alpha': trial.suggest_float('alpha', 1e-8, 1.0, log=True),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'random_state': 42,
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10)
        }
        model = XGBClassifier(**param)
        model.fit(X_train, y_train, verbose=False)
        preds = model.predict_proba(X_train)[:,1]
        auc = roc_auc_score(y_train, preds)
        return auc

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50)
    return study.best_params

def train_model(X_train, y_train, params):
    model = XGBClassifier(**params)
    model.fit(X_train, y_train)
    return model

def evaluate_model(model, X_test, y_test):
    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:,1]
    print("ROC AUC Skoru:", roc_auc_score(y_test, proba))
    print("Classification Report:")
    print(classification_report(y_test, preds))
    cm = confusion_matrix(y_test, preds)
    sns.heatmap(cm, annot=True, fmt='d')
    plt.show()

def save_model(model, scaler):
    joblib.dump(model, 'models/credit_scoring_model.pkl')
    joblib.dump(scaler, 'models/credit_scaler.pkl')

def main():
    data = load_data()
    X, y = preprocess_data(data)
    X = feature_engineering(X)
    X_resampled, y_resampled = balance_data(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    best_params = optimize_model(X_train_scaled, y_train)
    model = train_model(X_train_scaled, y_train, best_params)
    evaluate_model(model, X_test_scaled, y_test)
    save_model(model, scaler)

if __name__ == "__main__":
    main()


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



FileNotFoundError: [Errno 2] No such file or directory: 'give_me_some_credit.csv'

In [3]:
# dolandiricilik_tespiti.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

import kagglehub

# Download latest version
data = kagglehub.dataset_download("mlg-ulb/creditcardfraud")

print("Path to dataset files:", data)

def load_data():
    data = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
    print("Path to dataset files:", data)

def preprocess_data(data):
    data.fillna(0, inplace=True)
    X = data.drop('Class', axis=1)
    y = data['Class']
    return X, y

def scale_data(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, scaler

def build_autoencoder(input_dim):
    model = Sequential()
    model.add(Dense(32, activation='relu', input_dim=input_dim))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(input_dim, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

def main():
    data = load_data()
    X, y = preprocess_data(data)
    X_scaled, scaler = scale_data(X)
    # Normal işlemlerle eğitme
    X_train = X_scaled[y == 0]
    X_test = X_scaled
    y_test = y
    autoencoder = build_autoencoder(X_train.shape[1])
    autoencoder.fit(X_train, X_train, epochs=10, batch_size=32, shuffle=True, validation_split=0.1)
    reconstructions = autoencoder.predict(X_test)
    mse = np.mean(np.power(X_test - reconstructions, 2), axis=1)
    error_df = pd.DataFrame({'reconstruction_error': mse, 'true_class': y_test})
    threshold = error_df[error_df['true_class'] == 0]['reconstruction_error'].quantile(0.99)
    y_pred = [1 if e > threshold else 0 for e in error_df['reconstruction_error'].values]
    print("ROC AUC Skoru:", roc_auc_score(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d')
    plt.show()
    autoencoder.save('models/fraud_detection_autoencoder.h5')
    joblib.dump(scaler, 'models/fraud_scaler.pkl')

if __name__ == "__main__":
    main()


Downloading from https://www.kaggle.com/api/v1/datasets/download/mlg-ulb/creditcardfraud?dataset_version_number=3...


100%|██████████| 66.0M/66.0M [00:03<00:00, 17.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mlg-ulb/creditcardfraud/versions/3
Path to dataset files: /root/.cache/kagglehub/datasets/mlg-ulb/creditcardfraud/versions/3


AttributeError: 'NoneType' object has no attribute 'fillna'

In [None]:
# musteri_segmentasyonu.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import plotly.express as px
import joblib

def load_data():
    data = pd.read_csv('bank_marketing.csv')
    return data

def preprocess_data(data):
    data.fillna(method='ffill', inplace=True)
    categorical_cols = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
    data = pd.get_dummies(data, columns=categorical_cols)
    return data

def scale_data(data):
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)
    return data_scaled, scaler

def reduce_dimensions(data_scaled):
    pca = PCA(n_components=3)
    data_pca = pca.fit_transform(data_scaled)
    return data_pca, pca

def cluster_data(data_pca):
    model = AgglomerativeClustering(n_clusters=4)
    clusters = model.fit_predict(data_pca)
    return clusters, model

def visualize_clusters(data_pca, clusters):
    fig = px.scatter_3d(x=data_pca[:,0], y=data_pca[:,1], z=data_pca[:,2], color=clusters.astype(str))
    fig.show()

def save_models(scaler, pca, model):
    joblib.dump(scaler, 'models/segment_scaler.pkl')
    joblib.dump(pca, 'models/segment_pca.pkl')
    joblib.dump(model, 'models/segment_model.pkl')

def main():
    data = load_data()
    data_processed = preprocess_data(data)
    data_scaled, scaler = scale_data(data_processed)
    data_pca, pca = reduce_dimensions(data_scaled)
    clusters, model = cluster_data(data_pca)
    data['segment'] = clusters
    silhouette_avg = silhouette_score(data_scaled, clusters)
    print("Silhouette Score:", silhouette_avg)
    visualize_clusters(data_pca, clusters)
    data.to_csv('customer_segments.csv', index=False)
    save_models(scaler, pca, model)

if __name__ == "__main__":
    main()


In [4]:
# gelir_tahmini.py

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
import optuna
import joblib
import kagglehub

def load_data():
    # Download latest version
    path = kagglehub.dataset_download("kamaumunyori/income-prediction-dataset-us-20th-century-data")

    print("Path to dataset files:", path)

def preprocess_data(data):
    data.fillna(method='ffill', inplace=True)
    categorical_cols = data.select_dtypes(include=['object']).columns
    data = pd.get_dummies(data, columns=categorical_cols)
    X = data.drop('income', axis=1)
    y = data['income']
    return X, y

def scale_data(X):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, scaler

def build_model(trial, input_dim):
    model = Sequential()
    n_layers = trial.suggest_int('n_layers', 1, 5)
    for i in range(n_layers):
        num_units = trial.suggest_int('n_units_l{}'.format(i), 32, 256)
        model.add(Dense(num_units, activation='relu'))
        dropout_rate = trial.suggest_float('dropout_l{}'.format(i), 0.0, 0.5)
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def optimize_model(X_train, y_train, input_dim):
    def objective(trial):
        model = build_model(trial, input_dim)
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=5)
        history = model.fit(X_train, y_train, validation_split=0.1, callbacks=[es], epochs=50, batch_size=32, verbose=0)
        loss = history.history['val_loss'][-1]
        return loss
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=20)
    return study.best_params

def train_model(X_train, y_train, params, input_dim):
    model = build_model(optuna.trial.FixedTrial(params), input_dim)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
    model.fit(X_train, y_train, validation_split=0.1, callbacks=[es], epochs=100, batch_size=32, verbose=1)
    return model

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    print("Mean Absolute Error:", mae)

def main():
    data = load_data()
    X, y = preprocess_data(data)
    X_scaled, scaler = scale_data(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
    input_dim = X_train.shape[1]
    best_params = optimize_model(X_train, y_train, input_dim)
    model = train_model(X_train, y_train, best_params, input_dim)
    evaluate_model(model, X_test, y_test)
    model.save('models/income_prediction_model.h5')
    joblib.dump(scaler, 'models/income_scaler.pkl')

if __name__ == "__main__":
    main()


Downloading from https://www.kaggle.com/api/v1/datasets/download/kamaumunyori/income-prediction-dataset-us-20th-century-data?dataset_version_number=1...


100%|██████████| 9.02M/9.02M [00:01<00:00, 5.86MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/kamaumunyori/income-prediction-dataset-us-20th-century-data/versions/1


AttributeError: 'NoneType' object has no attribute 'fillna'

In [8]:
# finansal_saglik_skoru.py

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import kagglehub
def load_data():
    # Download latest version
    data = kagglehub.dataset_download("teertha/personal-loan-modeling")

    print("Path to dataset files:", data)

def preprocess_data(data):
    data.fillna(method='ffill', inplace=True)
    return data

def calculate_scores(data):
    data['DebtIncomeRatio'] = data['TotalDebt'] / data['AnnualIncome']
    data['CreditUtilization'] = data['CurrentBalance'] / data['CreditLimit']
    data['PaymentHistoryScore'] = data['OnTimePayments'] / data['TotalPayments']
    data['LengthOfCreditHistory'] = data['CreditHistoryYears']
    # Özellikleri normalleştirme
    scaler = MinMaxScaler()
    features = ['DebtIncomeRatio', 'CreditUtilization', 'PaymentHistoryScore', 'LengthOfCreditHistory']
    data[features] = scaler.fit_transform(data[features])
    # Ağırlıklı skor hesaplama
    weights = {'DebtIncomeRatio': 0.3, 'CreditUtilization': 0.3, 'PaymentHistoryScore': 0.3, 'LengthOfCreditHistory': 0.1}
    data['FinancialHealthScore'] = (data['DebtIncomeRatio'] * weights['DebtIncomeRatio'] +
                                    data['CreditUtilization'] * weights['CreditUtilization'] +
                                    data['PaymentHistoryScore'] * weights['PaymentHistoryScore'] +
                                    data['LengthOfCreditHistory'] * weights['LengthOfCreditHistory']) * 100
    return data

def main():
    data = load_data()
    data = preprocess_data(data)
    data = calculate_scores(data)
    data.to_csv('financial_health_scores.csv', index=False)
    print("Finansal Sağlık Skoru Hesaplandı ve Kaydedildi.")

if __name__ == "__main__":
    main()


Path to dataset files: /root/.cache/kagglehub/datasets/teertha/personal-loan-modeling/versions/1


TypeError: 'NoneType' object is not subscriptable

In [None]:
# crm_entegrasyonu.py

import pandas as pd
import numpy as np

def load_data():
    crm_data = pd.read_csv('crm_data.csv')
    segments = pd.read_csv('customer_segments.csv')
    return crm_data, segments

def merge_data(crm_data, segments):
    data = crm_data.merge(segments[['customer_id', 'segment']], on='customer_id', how='left')
    return data

def analyze_behavior(data):
    behavior = data.groupby('segment').agg({
        'purchase_amount': ['mean', 'sum'],
        'interaction_count': 'mean',
        'customer_lifetime_value': 'mean'
    })
    behavior.columns = ['_'.join(col).strip() for col in behavior.columns.values]
    behavior.reset_index(inplace=True)
    behavior.to_csv('segment_behavior.csv', index=False)
    return behavior

def main():
    crm_data, segments = load_data()
    data = merge_data(crm_data, segments)
    behavior = analyze_behavior(data)
    print("Segment Davranış Analizi Tamamlandı ve Kaydedildi.")

if __name__ == "__main__":
    main()


In [None]:
# raporlama_dashboard.py

import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output

def load_data():
    financial_data = pd.read_csv('financial_analytics.csv')
    return financial_data

def create_dashboard(financial_data):
    app = dash.Dash(__name__)

    app.layout = html.Div(children=[
        html.H1(children='Finansal Analitik Dashboard'),
        dcc.Dropdown(
            id='metric-dropdown',
            options=[
                {'label': 'Gelir', 'value': 'income'},
                {'label': 'Varlıklar', 'value': 'assets'},
                {'label': 'Yükümlülükler', 'value': 'liabilities'}
            ],
            value='income'
        ),
        dcc.Graph(id='metric-graph')
    ])

    @app.callback(
        Output('metric-graph', 'figure'),
        [Input('metric-dropdown', 'value')]
    )
    def update_graph(selected_metric):
        fig = px.line(financial_data, x='month', y=selected_metric, title=f'Aylık {selected_metric.capitalize()} Trendleri')
        return fig

    app.run_server(debug=True)

def main():
    financial_data = load_data()
    create_dashboard(financial_data)

if __name__ == "__main__":
    main()


In [None]:
# all_in_banker_main.py

def main():
    print("ALL-IN-BANKER Platformu Başlatılıyor...\n")
    import kredi_skorlamasi
    kredi_skorlamasi.main()
    import dolandiricilik_tespiti
    dolandiricilik_tespiti.main()
    import musteri_segmentasyonu
    musteri_segmentasyonu.main()
    import gelir_tahmini
    gelir_tahmini.main()
    import finansal_saglik_skoru
    finansal_saglik_skoru.main()
    import crm_entegrasyonu
    crm_entegrasyonu.main()
    import raporlama_dashboard
    raporlama_dashboard.main()
    print("\nALL-IN-BANKER Platformu Başarıyla Çalıştırıldı.")

if __name__ == "__main__":
    main()
