In [16]:
import pandas as pd
import os
from datetime import datetime

os.makedirs("data/clean", exist_ok=True)

# === Kunddata ===
customers_df = pd.read_csv("data/sebank_customers_with_accounts.csv")
print(f"Totalt kunder innan validering: {len(customers_df)}")

# Ta bort rader utan Customer eller BankAccount
customers_df = customers_df.dropna(subset=["Customer", "BankAccount"])
print(f"Efter dropna på Customer och BankAccount: {len(customers_df)}")

# Telefon: tillåt siffror, mellanslag, +, bindestreck, 7-15 tecken
phone_pattern = r"^\+?[\d\s\-]{7,15}$"
customers_df = customers_df[customers_df["Phone"].str.contains(phone_pattern, na=False)]
print(f"Efter telefonfilter: {len(customers_df)}")

# Personnummer: 6 siffror + valfritt - eller + + 4 siffror
personnummer_pattern = r"^\d{6}[-+]?\d{4}$"
customers_df = customers_df[customers_df["Personnummer"].str.contains(personnummer_pattern, na=False)]
print(f"Efter personnummerfilter: {len(customers_df)}")

# Ta bort dubletter på BankAccount
customers_df = customers_df.drop_duplicates(subset="BankAccount")
print(f"Efter drop_duplicates på BankAccount: {len(customers_df)}")

# Spara ren kunddata
customers_df.to_csv("data/clean/customers_clean.csv", index=False)
print("Kunddata sparad i data/clean/customers_clean.csv\n")


# === Transaktionsdata ===
transactions_df = pd.read_csv("data/transactions.csv")
transactions_df["notes"] = transactions_df["notes"].fillna("ingen kommentar")
transactions_df["timestamp"] = pd.to_datetime(transactions_df["timestamp"], errors='coerce')

print(f"Totalt transaktioner innan validering: {len(transactions_df)}")

# Belopp >= 0.01
transactions_df = transactions_df[transactions_df["amount"] >= 0.01]
print(f"Efter filter på amount >= 0.01: {len(transactions_df)}")

# Valuta: exakt 3 versaler
transactions_df = transactions_df[transactions_df["currency"].str.match(r"^[A-Z]{3}$", na=False)]
print(f"Efter valutafilter: {len(transactions_df)}")

# Ta bort rader utan notes
transactions_df = transactions_df.dropna(subset=["notes"])
print(f"Efter dropna på notes: {len(transactions_df)}")

# Ta bort dubletter på transaction_id
transactions_df = transactions_df.drop_duplicates(subset="transaction_id")
print(f"Efter drop_duplicates på transaction_id: {len(transactions_df)}")

# Spara ren transaktionsdata
transactions_df.to_csv("data/clean/transactions_clean.csv", index=False)
print("Transaktionsdata sparad i data/clean/transactions_clean.csv\n")


# === Flagga misstänkta transaktioner ===
flagged = []

# 1. Samma konto som avsändare och mottagare
samma_konto = transactions_df[transactions_df["sender_account"] == transactions_df["receiver_account"]]
for _, row in samma_konto.iterrows():
    flagged.append({
        "transaction_id": row["transaction_id"],
        "reason": "Samma konto som avsändare och mottagare",
        "flagged_date": datetime.today().strftime("%Y-%m-%d"),
        "amount": row["amount"]
    })

# 2. Högt belopp
for _, row in transactions_df[transactions_df["amount"] > 100_000].iterrows():
    flagged.append({
        "transaction_id": row["transaction_id"],
        "reason": "Belopp över 100 000 SEK",
        "flagged_date": datetime.today().strftime("%Y-%m-%d"),
        "amount": row["amount"]
    })

# 3. Negativt belopp (ska inte finnas kvar, men dubbelkolla)
for _, row in transactions_df[transactions_df["amount"] < 0].iterrows():
    flagged.append({
        "transaction_id": row["transaction_id"],
        "reason": "Negativt belopp",
        "flagged_date": datetime.today().strftime("%Y-%m-%d"),
        "amount": row["amount"]
    })

# 4. Flera transaktioner från samma konto inom 1 minut
tx = transactions_df.sort_values(by=["sender_account", "timestamp"])
for account, group in tx.groupby("sender_account"):
    timestamps = group["timestamp"].tolist()
    for i in range(len(timestamps) - 3):
        if (timestamps[i + 3] - timestamps[i]).total_seconds() <= 60:
            match = group.iloc[i:i+4]
            for _, row in match.iterrows():
                flagged.append({
                    "transaction_id": row["transaction_id"],
                    "reason": "Flera transaktioner inom 1 minut",
                    "flagged_date": datetime.today().strftime("%Y-%m-%d"),
                    "amount": row["amount"]
                })

# Ta bort eventuella dubbletter i flaggade transaktioner
flagged_df = pd.DataFrame(flagged).drop_duplicates(subset=["transaction_id", "reason"])

# Spara flaggade transaktioner
flagged_df.to_csv("data/clean/flagged_transactions.csv", index=False)
print("Flaggade transaktioner sparade i data/clean/flagged_transactions.csv\n")

print("Klart! Validering och flaggning klar.")








Error while processing DataContextConfig: datasources notebooks validations_store_name evaluation_parameter_store_name include_rendered_content anonymous_usage_statistics
Encountered errors during loading config.  See ValidationError for more details.


InvalidDataContextConfigError: Error while processing DataContextConfig: datasources notebooks validations_store_name evaluation_parameter_store_name include_rendered_content anonymous_usage_statistics

AttributeError: 'FileDataContext' object has no attribute 'sources'

ImportError: cannot import name 'BaseDataContext' from 'great_expectations.data_context' (c:\Users\46722\Desktop\python-bank-projekt\venv311\Lib\site-packages\great_expectations\data_context\__init__.py)

SyntaxError: invalid syntax (246434187.py, line 2)

TypeError: DataContextConfig.__init__() got an unexpected keyword argument 'anonymous_usage_statistics'

✅ تم إنشاء مشروع Great Expectations بنجاح!


Error while processing DataContextConfig: datasources notebooks validations_store_name evaluation_parameter_store_name include_rendered_content anonymous_usage_statistics
Encountered errors during loading config.  See ValidationError for more details.


InvalidDataContextConfigError: Error while processing DataContextConfig: datasources notebooks validations_store_name evaluation_parameter_store_name include_rendered_content anonymous_usage_statistics

✅ تم إنشاء السياق بنجاح!




ValueError: Either datasource or kwargs are required

AttributeError: 'FileDataContext' object has no attribute 'sources'



AttributeError: 'dict' object has no attribute 'datasource_name'



AttributeError: 'DataFrameAsset' object has no attribute 'build_batch'

1.5.3


    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


TypeError: DataFrameAsset.get_batch() got an unexpected keyword argument 'dataframe'

ValidationError: 2 validation errors for BatchRequest
batch_identifiers
  extra fields not permitted (type=value_error.extra)
runtime_parameters
  extra fields not permitted (type=value_error.extra)

AttributeError: 'DataFrameAsset' object has no attribute 'get_batch_from_dataframe'

ModuleNotFoundError: No module named 'great_expectations.experimental.datasources'

✅ تم إنشاء السياق بنجاح!


DataContextError: Can not write the fluent datasource my_pandas_ds because a datasource of that name already exists in the data context.

TypeError: 'DataSourceManager' object is not subscriptable



KeyError: 'my_pandas_ds'

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


TypeError: 'DataSourceManager' object is not subscriptable

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


ValueError: "transactions_asset" already exists (all existing assets are transactions_asset)

TypeError: DataFrameAsset.build_batch_request() got an unexpected keyword argument 'dataframe'

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


BuildBatchRequestError: Bad input to build_batch_request: options must contain exactly 1 key, 'dataframe'.

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


AttributeError: 'FileDataContext' object has no attribute 'add_or_update_expectation_suite'

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

✅ تم حفظ التوقعات بنجاح! 🎉


Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]

🔍 تم التحقق من البيانات بنجاح!
عدد التوقعات التي تم تنفيذها: 1


📄 افتح التقرير من هنا:
file://c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted/data_docs/local_site/index.html


In [1]:
import great_expectations as gx



    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.


AttributeError: 'FileDataContext' object has no attribute 'add_or_update_checkpoint'

ImportError: cannot import name 'SimpleCheckpoint' from 'great_expectations.checkpoint' (c:\Users\46722\Desktop\python-bank-projekt\venv311\Lib\site-packages\great_expectations\checkpoint\__init__.py)

    - No action was taken.
            c:\Users\46722\Desktop\python-bank-projekt\gx_project\uncommitted. - No action was taken.
