In [1]:
# !pip install neo4j
# !pip install graphdatascience

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.chains.llm import LLMChain
from langchain.schema import AIMessage, HumanMessage, SystemMessage
import os
from graphdatascience import GraphDataScience
import pandas as pd
import time
import warnings
warnings.filterwarnings("ignore")
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

In [2]:
%%time
from langchain_community.chat_models import ChatOllama

llm = ChatOllama(temperature=0, model = "llama3")

CPU times: total: 109 ms
Wall time: 129 ms


In [3]:
df_repo=pd.read_csv("crime_lang_chain_1.csv",  encoding_errors= 'replace')

In [4]:
# df_repo["V_BIOGRAPHY_repo"]
df_repo["V_BIOGRAPHY"]

0     Oct 2022 - sentenced by Sampit District Court ...
1     Jun 2022 - sentenced by Denpasar District Cour...
2     Dec 2017 - sentenced by Hanoi Municipal People...
3     Nov 2022 - reportedly arrested by Counter Terr...
4     May 2014 - reportedly sentenced to 1 year and ...
5     Dec 2010 - sentenced by Tangshan Municipal Int...
6     Dec 2007 - sentenced by Hanoi Municipal People...
7     May 2016 - registration to operate as an insur...
8     Jan 2023 - First Information Report filed by C...
9     Jan 2008 - declared as proclaimed offender by ...
10    Oct 2019 - reportedly sentenced by Suining Cou...
11    Apr 2022 - reportedly sentenced to 3 years imp...
12    Jun 2019 - sentenced to 4 years and 3 months i...
13    May 2023 - held responsible for company's viol...
14    Apr 2007 - reportedly arrested following the s...
15    Sep 2020 - sentenced by Deyang Municipal Inter...
16    Sep 2020 - sentenced to 2 years and 4 months i...
17    Nov 2016 - released from custody by Zhengn

In [83]:
system_prompt = """
identify the main points in the biography provided and return them in json format. identify the crime committed by the person,the date of crime and if in case crime date is not
availabe please provide date closest to crime happened.Provide output in following format with no additional text. 
"Crime": "value"
"Date": "value"
"""

In [84]:
prompt_template_entity = ChatPromptTemplate.from_messages([ ("system", f'''{system_prompt}'''), ("human", "{user_input}"),])

In [85]:
prompt_template_entity = ChatPromptTemplate.from_messages([ ("system", f'''{system_prompt}'''), ("human", "{user_input}"),])

In [86]:
text = """Aug 2022 - reportedly sentenced by Da Teh District People's Court to 9 months imprisonment, suspended with a probation period of 18 months, for illegal gambling (VND79.6m)."""

print(text)

Aug 2022 - reportedly sentenced by Da Teh District People's Court to 9 months imprisonment, suspended with a probation period of 18 months, for illegal gambling (VND79.6m).


In [87]:
%%time
t1=time.time()
llm_chain = LLMChain(llm=llm, prompt=prompt_template_entity, verbose=False)

output = llm_chain.run(user_input= text)
t2=time.time()

CPU times: total: 0 ns
Wall time: 13.6 s


In [88]:
print(t2-t1)

13.55068826675415


In [89]:
print(output)

{
"Crime": "Illegal Gambling",
"Date": "August 2022"
}


In [90]:
crime_schema = ResponseSchema(name="Crime",
                                   description="This refers to crime entity present in given text.")

date_schema = ResponseSchema(name="Date",
                                       description="This is refers to date present in given text.")
# Within_7_years = ResponseSchema(name="Within 7 years",
#                                    description="This refers to crime has happpened within 7 years or not in given text.")

# More_than_7_years = ResponseSchema(name="More than 7 years",
#                                        description="This is refers to crime has happpened beyond 7 years or not in given text.")


response_schema = [
    crime_schema ,
    date_schema
    # Within_7_years
]

# prompt_template = ChatPromptTemplate.from_template(template)
# messages = prompt_template.format_messages(text=text, format_instructions=format_instructions)

output_parser = StructuredOutputParser.from_response_schemas(response_schema)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)
# output_dict=  output_parser.parse(output)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"Crime": string  // This refers to crime entity present in given text.
	"Date": string  // This is refers to date present in given text.
}
```


In [91]:
df_repo.shape

(40, 17)

In [92]:
# df_repo["crime_entity"]=""
# df_repo["date_entity"]=""
df_repo["time_of_entity_extr"]=""
# df_repo['Within 7 years']=""
# df_repo["More than 7 years"]=""
df_repo["output"]=""

In [93]:

df_repo["time_of_entity_extr"]=0


texts=df_repo["V_BIOGRAPHY"]

for i,text in zip(range(len(texts)),texts):
    print(text)
# text = """ Jun 2021 - reportedly sentenced to 10 years imprisonment by Muse District Court for human trafficking. Mar 2023 - no further information reported."""
# df.loc[:1,"Crime_extracted"]
    prompt_template_entity = ChatPromptTemplate.from_messages([ ("system", f'''{system_prompt}'''), ("human", "{user_input}"),])
    prompt_template_entity.input_variables
    start=time.time()
    # final_prompt=prompt_template_entity.format_prompt(user_input=text).to_messages()
    # output=llm(final_prompt)
    llm_chain = LLMChain(llm=llm, prompt=prompt_template_entity, verbose=True)
    output = llm_chain.run(user_input= text)
    end=time.time()

    df_repo["time_of_entity_extr"].loc[i]=end-start
    print(output)
    df_repo["output"].loc[i]=output
    # output_dict=  output_parser.parse(output)
    # df_repo["crime_entity"].loc[i]=output_dict["Crime"]
    # df_repo["date_entity"].loc[i]=output_dict["Date"]
    # df_repo["Within 7 years"].loc[i]=output_dict["Within 7 years"]
    # df_repo["More than 7 years"].loc[i]=output_dict["More than 7 years"]

    # print(output)

Oct 2022 - sentenced by Sampit District Court to 5 years imprisonment and fined IDR1b in lieu of additional 3 months imprisonment for methamphetamine trafficking (Feb - 2021).


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: 
identify the main points in the biography provided and return them in json format. identify the crime committed by the person,the date of crime and if in case crime date is not
availabe please provide date closest to crime happened.Provide output in following format with no additional text. 
"Crime": "value"
"Date": "value"

Human: Oct 2022 - sentenced by Sampit District Court to 5 years imprisonment and fined IDR1b in lieu of additional 3 months imprisonment for methamphetamine trafficking (Feb - 2021).[0m

[1m> Finished chain.[0m
{
"Crime": "Methamphetamine trafficking",
"Date": "February 2021"
}
Jun 2022 - sentenced by Denpasar District Court to 1 month and 10 days imprisonment and fined IDR500k in lieu of additional 1

In [94]:
output_parser = StructuredOutputParser.from_response_schemas(response_schema)
format_instructions = output_parser.get_format_instructions()
print(format_instructions)


The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"Crime": string  // This refers to crime entity present in given text.
	"Date": string  // This is refers to date present in given text.
}
```


In [95]:
df_repo["output"]

0                                                                                                                                                                              {\n"Crime": "Methamphetamine trafficking",\n"Date": "February 2021"\n}
1                                                                                                                                                                                             "Crime": "wildlife trafficking",\n"Date": "August 2021"
2                                                                                                                                                           {\n"Crime": "Illegal possession with intent to distribute heroin",\n"Date": "Dec 2017"\n}
3                                                                                                                                                                                     "Crime": "terrorism related activities"\n"Date": "Nov 20, 2022"
4               

In [97]:
# df_repo["output_2"]=df_repo["output"].apply(lambda x: output_parser.parse(x))

In [99]:
pd.set_option("display.max_colwidth",None)
df_repo[["V_BIOGRAPHY","output","time_of_entity_extr"]].to_csv("entity_extracted_2.csv")

In [100]:
df_repo[["V_BIOGRAPHY","output","time_of_entity_extr"]]

Unnamed: 0,V_BIOGRAPHY,output,time_of_entity_extr
0,Oct 2022 - sentenced by Sampit District Court to 5 years imprisonment and fined IDR1b in lieu of additional 3 months imprisonment for methamphetamine trafficking (Feb - 2021).,"{\n""Crime"": ""Methamphetamine trafficking"",\n""Date"": ""February 2021""\n}",11.212614
1,Jun 2022 - sentenced by Denpasar District Court to 1 month and 10 days imprisonment and fined IDR500k in lieu of additional 1 month of imprisonment for wildlife trafficking - (Aug 2021 - oct 2021),"""Crime"": ""wildlife trafficking"",\n""Date"": ""August 2021""",12.008065
2,Dec 2017 - sentenced by Hanoi Municipal People's Court to 3 years and 6 months imprisonment for illegal possession with intent to distribute heroin (3.5kg). Jan 2023 - no further information reported.,"{\n""Crime"": ""Illegal possession with intent to distribute heroin"",\n""Date"": ""Dec 2017""\n}",12.889995
3,"Nov 2022 - reportedly arrested by Counter Terrorism Department (CTD) for involvement in terrorism related activities.[NONCONVICTION TERROR CATEGORY NOTICE] This category includes information about individuals who are reportedly being investigated for, or have been arrested or charged on suspicion of, involvement in terrorism or terror related activities. The category also includes individuals or entities identified on national or internationally recognised banning, warning or wanted lists as allegedly connected to terrorism or individuals who are reportedly connected to an organisation included on any national or international terrorism list. Inclusion in this category does not mean that an individual or entity is a terrorist or terrorist organisation or that they are involved in or connected to terrorism or terror related activity. Individuals included in this category have not been convicted of any terror related activity; you should review the content carefully and in accordance with our terms and conditions, further enquiries should be made of the report subject to investigate the outcome of any alleged investigation, arrest, charges or any reported connection to any terror related activity and whether such allegations are denied.","""Crime"": ""terrorism related activities""\n""Date"": ""Nov 20, 2022""",68.290709
4,May 2014 - reportedly sentenced to 1 year and 2 months imprisonment for embezzlement.,"{\n""Crime"": ""Embezzlement"",\n""Date"": ""May 2014""\n}",17.69673
5,Dec 2010 - sentenced by Tangshan Municipal Intermediate Court to death with 2 years suspension for participating in an organised crime group and intentional homicide. Deprived of political rights for life. May 2019 - no further information reported.,"""Crime"": ""intentional homicide"",\n""Date"": ""Dec 20, 2010""",21.037725
6,Dec 2007 - sentenced by Hanoi Municipal People's Court to 16 years imprisonment for heroin trafficking. Nov 2018 - no further information reported.,"{\n""Crime"": ""heroin trafficking"",\n""Date"": ""Dec 2007""\n}",9.887909
7,May 2016 - registration to operate as an insurance agent suspended by the Insurance Regulatory and Development Authority (IRDA). Sep 2021 - no further information reported.,"{\n""Crime"": ""Registration fraud"",\n""Date"": ""May 2016""\n}",10.238992
8,Jan 2023 - First Information Report filed by Central Bureau of Investigation for bank fraud (INR258.9m).,"{\n""Crime"": ""bank fraud"",\n""Date"": ""2023-01-20""\n}",9.481492
9,Jan 2008 - declared as proclaimed offender by Karachi Court for alleged involvement in criminal breach of trust by property. May 2022 - no further information reported.,"{\n""Crime"": ""Criminal breach of trust"",\n""Date"": ""May 20, 2022""\n}",11.655088
