# Model Inference

## First instance

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch

device = torch.device(0)
# Loading pretrained model chatglm-6B
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
response, history = model.chat(
    tokenizer,
    "Please provide a description of the data in the following financial domain\n total amount transfered: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
    history=[])
print(f"Before tuning:\n {response}\n")

# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()
response, history = model.chat(
    tokenizer,
    "Please provide a description of the data in the following financial domain\n total amount transfered: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
    history=[])
print(f"After tuning:\n {response}")

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.62s/it]


Before tuning:
 The data provides information about a company's financial performance, including the total amount of money being transferred, the number of transactions in a case, the number of linked customers, the value of the CASHDEPOCASHDEPO2 account, the value of the TRANSACTCRBOUTGO1 account, the value of the TRANSACTCRBOUTGO2 account, the value of the TRANSACTCRbinCOM1 account, the cust_risk_score, the bcnf_type_Name, and the CustRiskCountryId.

The effect of the number of transactions in case is 15.44%, which means that the company's financial performance has been affected by the number of transactions in a case.

The effect of the numberLinkedCustomers is 19.06%, which means that the company's financial performance has been affected by the number of linked customers.

The value of the CASHDEPOCASHDEPO2 account is 0.00, which means that the company's financial performance has not been affected by this account.

The value of the TRANSACTCRBOUTGO1 account is 0.00, which means tha

## Second instance

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch

device = torch.device(0)
# Loading pretrained model chatglm-6B
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
response, history = model.chat(
    tokenizer,
    "Please provide a description of the data in the following financial domain\n total amount in transactions: 18450.00 effect: 6.03%Number of transactions in case: 1 effect: 12.40%\nNumberLinkedCustomers: 3 effect: 14.18%\nCASHDEPOCASHDEPO2: 96143.71 effect: 3.51%TRANSACTCRBOUTGO1: 0.00 effect: 4.33%TRANSACTCRBINCOM1: 211.58 effect: 2.63%scenario: dbs_ctf_02 effect: 7.17%\ncust_risk_score: low effect: 4.91%\nbcnf_type_Name: natural effect: 17.80%\nCustDomcCountryId: Denmark effect: 4.45%\nprediction: 1 probability: 97.45%",
    history=[])
print(f"Before tuning:\n {response}\n")

# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()
response, history = model.chat(
    tokenizer,
    "Please provide a description of the data in the following financial domain\n total amount in transactions: 18450.00 effect: 6.03%Number of transactions in case: 1 effect: 12.40%\nNumberLinkedCustomers: 3 effect: 14.18%\nCASHDEPOCASHDEPO2: 96143.71 effect: 3.51%TRANSACTCRBOUTGO1: 0.00 effect: 4.33%TRANSACTCRBINCOM1: 211.58 effect: 2.63%scenario: dbs_ctf_02 effect: 7.17%\ncust_risk_score: low effect: 4.91%\nbcnf_type_Name: natural effect: 17.80%\nCustDomcCountryId: Denmark effect: 4.45%\nprediction: 1 probability: 97.45%",
    history=[])
print(f"After tuning:\n {response}")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 8/8 [00:11<00:00,  1.39s/it]
The dtype of attention mask (torch.int64) is not bool


Before tuning:
 The data describes a financial situation where a total amount in transactions of 40000.00 was increased by 10.72% due to a total amount received of 40000.00 being increased by 2.57%. The average of the total amount transfered was 0.00 and the standard deviation of the total amount received was 8.47%. The number of transactions in the case was 2 and the numberLinkedCustomers was 1. The Scenario was KYC_dap_01 and the cust_risk_score was low. The Bcnf_type_Name was natural. This information is useful for understanding the impact of various factors on the financial performance of a company.

After tuning:
 total amount in transactions is 40000.00 with the effect of 10.72%, indicating a significant positive contribution.  total amount received is 40000.00 with the effect of 2.57%, indicating a moderate positive impact. average  of total amount transfered is 0.00 with the effect of -2.20%, indicating a moderate negative impact. in 2 transaction with the effect of 6.08%, indi

## Third instance

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch

device = torch.device(0)
# Loading pretrained model chatglm-6B
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain\n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
                               history=[])
print(f"Before tuning:\n {response}\n")

# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain\n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
                               history=[])
print(f"After tuning:\n {response}")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 8/8 [01:34<00:00, 11.77s/it]
The dtype of attention mask (torch.int64) is not bool


Before tuning:
 The data in the provided financial domain describes a company's financial performance. The total amount transferred is the total value of all transactions made by the company, and the effect of this variable is 2.93%. The number of transactions in the case is 1, which means that there were only 1 transactions made by the company. The effect of this variable is 15.44%.

The variable "NumberLinkedCustomers" shows the number of customers who are linked to the company. The effect of this variable is 19.06%. The variable "CASHDEPOCASHDEPO2" is a hold-over variable from a previous question, it doesn't have a direct impact on the company's financial performance.

The variable "TransACTCRBOUTGO1" is a hold-over variable from a previous question, it doesn't have a direct impact on the company's financial performance. The variable "TransACTCRBOUTGO2" is a hold-over variable from a previous question, it doesn't have a direct impact on the company's financial performance. The varia

## Fourth instance

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch

device = torch.device(0)
# Loading pretrained model chatglm-6B
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain and list all variables in english \n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
                               history=[])
print(f"Before tuning:\n {response}\n")

# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain and list all variables in english\n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%",
                               history=[])
print(f"After tuning:\n {response}")

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.53s/it]


Before tuning:
 The data can be describe as follows:

-   Total amount transferred: 169675.51
-   Number of transactions in case: 1
-   NumberLinkedCustomers: 3
-   CASHDEPOCASHDEPO2: 0.00
-   TRANSACTCRBOUTGO1: 0.00
-   TRANSACTCRBOUTGO2: 0.00
-   TRANSACTCRBINCOM1: 1.00
-   cust_risk_score: medium
-   Bcnf_type_Name: natural
-   CustRiskCountryId: United Kingdom
-   prediction: 1
-   probability: 68.11%

The variables are all related to financial information, specifically to the case, where a customer has made a transfer. The amount transferred, the number of transactions, the number of customers linked to the case, the CASHDEPOCASHDEPO2 variable, the TRANSACTCRBOUTGO1 and TRANSACTCRBOUTGO2 variables, and the TRANSACTCRBINCOM1 variable are all measures of the performance of the case. The cust_risk_score variable is a measure of customer risk, and the bcnf_type_Name variable is a measure of the type of financial risk that is being considered. CustRiskCountryId is a measure of country 

## Five instance

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch

device = torch.device(0)
# Loading pretrained model chatglm-6B
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain and list all variables in english \n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%",
                               history=[])
print(f"Before tuning:\n {response}\n")

# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()
response, history = model.chat(tokenizer,
           "Please provide a description of the data in the following financial domain and list all variables in english\n total amount transferred: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%",
                               history=[])
print(f"After tuning:\n {response}")

Loading checkpoint shards: 100%|██████████| 8/8 [00:12<00:00,  1.54s/it]


Before tuning:
 The data in the following financial domain describes a company's financial performance. The variables are as follows:

- total amount transferred: The total amount of money that has been transferred between the company's customers.
-Number of transactions in case: The number of transactions that have occurred in a specific case (e.g. a deal between two customers).
-NumberLinkedCustomers: The number of customers who are linked to the company (e.g. if a customer has a business relationship with another customer, that customer is considered to be linked).
-CASHDEPOCASHDEPO2: The second value in the variable "CASHDEPOCASHDEPO" is a number, which indicates the amount of money that has been transferred between the company's customers in the previous month.
-TRANSACTCRBOUTGO1: The first value in the variable "TRANSACTCRBOUTGO" is a number, which indicates the total amount of money that has been transfered in a specific month (e.g. in the previous month).
-TRANSACTCRBOUTGO2: Th

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch
import json
from tqdm.notebook import tqdm

def load_json_file(file_path):
    with open(file_path, "r") as json_file:
        data = [json.loads(line.strip()) for line in json_file]
    return data

def save_json_file(data, file_path):
    with open(file_path, "w") as json_file:
        for item in data:
            json.dump(item, json_file)
            json_file.write("\n")
device = torch.device(0)
# Load the JSON file
json_file_path = "/home/user/data/test.json"
data = load_json_file(json_file_path)

# Initialize the model
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()


# Process each item in the JSON data and save the results
results = []
for item in tqdm(data, desc="Processing"):
    question = item.get("q", "") # returns the value of the item with the q key.
    response = model.chat(tokenizer, question, history=[])
    results.append({"q": question, "a": response[0]})

# Save the results as a new JSON file
output_file_path = "/home/user/data/test_result.json"
save_json_file(results, output_file_path)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Processing:   0%|          | 0/10 [00:00<?, ?it/s]

The dtype of attention mask (torch.int64) is not bool


#ROUGE evaluation

In [None]:
from peft import PeftModel
from transformers import AutoTokenizer, AutoModel
import torch
import json
from tqdm.notebook import tqdm

def load_json_file(file_path):
    with open(file_path, "r") as json_file:
        data = [json.loads(line.strip()) for line in json_file]
    return data

def save_json_file(data, file_path):
    with open(file_path, "w") as json_file:
        for item in data:
            json.dump(item, json_file)
            json_file.write("\n")
device = torch.device(0)
# Load the JSON file
json_file_path = "/home/user/data/test.json"
data = load_json_file(json_file_path)

# Initialize the model
model_path = "/home/user/imported_models/chatglm-6b-20230419"
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(device)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# Apply LoRa on the pretrained model
model = PeftModel.from_pretrained(model, "/home/user/data/update_weights4/checkpoint-10000").half()


# Process each item in the JSON data and save the results
results = []
for item in tqdm(data, desc="Processing"):
    question = item.get("q", "") # returns the value of the item with the q key.
    response = model.chat(tokenizer, question, history=[])
    results.append({"q": question, "a": response[0]})

# Save the results as a new JSON file
output_file_path = "/home/user/data/test_result.json"
save_json_file(results, output_file_path)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Processing:   0%|          | 0/1000 [00:00<?, ?it/s]

In [None]:
results[0]

{'q': 'Please provide a description of the data in the following financial domain\n total amount transfered: 169675.51 effect: 2.93%Number of transactions in case: 1 effect: 15.44%\nNumberLinkedCustomers: 3 effect: 19.06%\nCASHDEPOCASHDEPO2: 0.00 effect: -5.97%TRANSACTCRBOUTGO1: 0.00 effect: 2.64%TRANSACTCRBOUTGO2: 0.00 effect: -4.81%TRANSACTCRBINCOM1: 1.00 effect: -4.59%cust_risk_score: medium effect: -3.53%\nbcnf_type_Name: natural effect: 17.14%\nCustRiskCountryId: United Kingdom effect: 2.83%\nprediction: 1 probability: 68.11%',
 'a': 'total amount transfered is 169675.51 with the effect of 2.93%, indicating a moderate positive impact. in 1 transaction with the effect of 15.44%, indicating a significant positive contribution. Moreover, there is only 3 linked customer with the effect of 19.06%, indicating a significant positive contribution. The customer has made a cash deposit with an amount of 0.00 with the effect of -5.97%, indicating a moderate negative impact. Additionally, the

In [None]:
results[0].get("a", "")

'total amount transfered is 169675.51 with the effect of 2.93%, indicating a moderate positive impact. in 1 transaction with the effect of 15.44%, indicating a significant positive contribution. Moreover, there is only 3 linked customer with the effect of 19.06%, indicating a significant positive contribution. The customer has made a cash deposit with an amount of 0.00 with the effect of -5.97%, indicating a moderate negative impact. Additionally, the outgoing cross-border transfers amount(01) is 0.00 with the effect of 2.64%, indicating a moderate positive impact. Additionally, the outgoing cross-border transfers amount(02) is 0.00 with the effect of -4.81%, indicating a moderate negative impact. Additionally, the incoming cross-border transfers amount(01) to 1.00 with the effect of -4.59%, indicating a moderate negative impact. The customer has a risk score classified as medium with the effect of -3.53%, indicating a moderate negative impact. and their business card type is considere

####Load testing json file and get predictions for each case

In [None]:
import json
from rouge import Rouge

def load_json_file(file_path):
    with open(file_path, "r") as json_file:
        data = [json.loads(line.strip()) for line in json_file]
    return data

refer_json_file_path = "/home/user/data/test_data_reference.json"
refer = load_json_file(refer_json_file_path)

output_file_path = "/home/user/data/test_result.json"
pred = load_json_file(output_file_path)

predictions = []
for item in pred:
    response = item.get("a", "")
    predictions.append(response)

references = []
for item in refer:
    refer = item.get("a", "")
    references.append(refer)


In [None]:
rouge = Rouge()
score_list = []
for p, r in zip(predictions, references):
    scores = rouge.get_scores(p, r)
    score_list.append(scores)

####Define an average ROUGE calculation function

In [None]:
def cal_avg_rouge(score_list):
    avg_rouge_scores = {"rouge-1": {"f": 0, "p": 0, "r": 0},
                        "rouge-2": {"f": 0, "p": 0, "r": 0},
                        "rouge-l": {"f": 0, "p": 0, "r": 0}}

    for item in scores:
        for m in ['rouge-1', 'rouge-2', 'rouge-l']:
            for s in ['r', 'p', 'f']:
                avg_rouge_scores[m][s] += item[m][s]
    for m in ['rouge-1', 'rouge-2', 'rouge-l']:
        for s in ['r', 'p', 'f']:
            avg_rouge_scores[m][s] /= len(score_list)
    return avg_rouge_scores

In [None]:
average_rouge_score = cal_avg_rouge(score_list)

####print ROUGE metrix

In [None]:
average_rouge_score

{'rouge-1': {'f': 0.000999999995, 'p': 0.001, 'r': 0.001},
 'rouge-2': {'f': 0.000999999995, 'p': 0.001, 'r': 0.001},
 'rouge-l': {'f': 0.000999999995, 'p': 0.001, 'r': 0.001}}