In [1]:
import pandas as pd
data=pd.read_csv("balanced_dataset3.csv")

In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119028 entries, 0 to 119027
Data columns (total 15 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   User            119028 non-null  int64  
 1   Card            119028 non-null  int64  
 2   Year            119028 non-null  int64  
 3   Month           119028 non-null  int64  
 4   Day             119028 non-null  int64  
 5   Time            119028 non-null  object 
 6   Amount          119028 non-null  object 
 7   Use Chip        119028 non-null  object 
 8   Merchant Name   119028 non-null  int64  
 9   Merchant City   119028 non-null  object 
 10  Merchant State  45613 non-null   object 
 11  Zip             38845 non-null   float64
 12  MCC             119028 non-null  int64  
 13  Errors?         3043 non-null    object 
 14  Is Fraud?       119028 non-null  object 
dtypes: float64(1), int64(7), object(7)
memory usage: 13.6+ MB


In [3]:
data.drop(columns=['User', 'Card', 'Merchant Name', 'Zip', 'Errors?'], inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119028 entries, 0 to 119027
Data columns (total 10 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   Year            119028 non-null  int64 
 1   Month           119028 non-null  int64 
 2   Day             119028 non-null  int64 
 3   Time            119028 non-null  object
 4   Amount          119028 non-null  object
 5   Use Chip        119028 non-null  object
 6   Merchant City   119028 non-null  object
 7   Merchant State  45613 non-null   object
 8   MCC             119028 non-null  int64 
 9   Is Fraud?       119028 non-null  object
dtypes: int64(4), object(6)
memory usage: 9.1+ MB


In [4]:
data["description"] = data.apply(
    lambda row: f"On {row['Month']}/{row['Day']}/{row['Year']} at {row['Time']}, "
                f"a transaction of {row['Amount']} was made using {row['Use Chip']} "
                f"in {row['Merchant City']}, {row['Merchant State']}. "
                f"The merchant category code (MCC) was {row['MCC']}. ",
    axis=1
)

In [7]:
lamini_data = [
    {
        "instruction": "Determine if the transaction is fraudulent based on the given details.",
        "input": desc,  # The transaction description you generated earlier
        "output": "Fraudulent" if fraud == 'Yes' else "Not Fraudulent"
    }
    for desc, fraud in zip(data['description'], data['Is Fraud?'])
]
lamini_data[0]

{'instruction': 'Determine if the transaction is fraudulent based on the given details.',
 'input': 'On 2/2/2004 at 09:50, a transaction of $122.08 was made using Online Transaction in  ONLINE, nan. The merchant category code (MCC) was 5712. ',
 'output': 'Fraudulent'}

In [None]:
import os
from lamini import Lamini
from dotenv import load_dotenv
load_dotenv()


# 🔑 Set your API Key
import lamini


lamini.api_key = os.environ["LAMINI_API_KEY"]

# 🔹 Initialize Lamini Fine-Tuner
finetuner = Lamini(model_name="meta-llama/Meta-Llama-3.1-8B-Instruct")

# 📌 Define Fine-Tuning Configuration
config = {
    "finetune_args": {
        "save_steps": 60,        
        "max_steps": 360,         
        "learning_rate": 0.0003,  
        "max_length": 2048,       
        "optim": "adafactor",     
        "eval_steps": 50,         
        "index_max_size": 65536,  
        "index_k": 2,
        "r_value": 32,
        "index_hnsw_m": 32,
        "index_method": "IndexIVFPQ",
        "index_hnsw_efSearch": 8,
        "index_pq_nbits": 8,
        "index_ivf_nprobe": 48,
        "index_hnsw_efConstruction": 16,
        "index_pq_m": 8,
        "index_ivf_nlist": 2048
    },
    "gpu_config": {},  
    "custom_model_name": "LLM_Fraud_Detection"  # Custom model name
}

# 📂 Path to JSONL Training Data
# Ensure the file is correctly formatted

# 🚀 Start Fine-Tuning
finetuner.tune(
    data_or_dataset_id=lamini_data, finetune_args=config["finetune_args"]
)