In [1]:
import os
from groq import Groq
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
import pymysql
from pymysql.cursors import DictCursor

In [2]:
client = Groq(api_key="1")
groq_api_key = "2"
langchain_api_key = "3"
langchain_endpoint = "4"

In [3]:
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0,
    max_tokens=2048,
    timeout=None,
    max_retries=2,
    groq_api_key=groq_api_key
)
print("Model retrieved")

Model retrieved


In [4]:
filename = "./audioFiles/sample2.m4a"

In [5]:
# Open the file and create a transcription
try:
    with open(filename, "rb") as file:
        response = client.audio.transcriptions.create(
            file=(filename, file.read()),
            model="whisper-large-v3",
            response_format="verbose_json"
        )
    # Print the type of the response object and its attributes
    print(f"Response type: {type(response)}")
    print("Response attributes:", dir(response))
    transcription_text = response.text
    print(transcription_text)
except Exception as e:
    print(f"An error occurred: {e}")

Response type: <class 'groq.types.audio.transcription.Transcription'>
Response attributes: ['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_validator__', '__reduce__', '__reduce_ex__', '__repr_

In [6]:
class DisasterClassification(BaseModel):
    city_type: str = Field(
        description="Classify the city in which disaster has occured"
    )
    state_type: str = Field(
        description="Classify the state of India in which disaster has occured"
    )
    disaster_type: str = Field(
        description="Classify the type of disaster. Possible types include 'flood', 'earthquake', 'fire', etc. Return only the disaster type."
    )
    severity: int = Field(
        description="Classify the severity of the disaster on a scale from 0 to 3. 0 being no impact and 3 being extremely severe."
    )

# Create structured LLM grader
structured_llm_grader = llm.with_structured_output(DisasterClassification)

# Define system prompt for disaster classification
system_prompt = """You are an expert in disaster management. Given the description of a disaster, get the city and state where the disaster has occured, classify it into one of the following types: 'flood', 'earthquake', 'fire', etc. 
    Additionally, assess the severity of the disaster on a scale from 0 to 3, where 0 indicates no impact and 3 indicates extremely severe. 
    Provide accurate and detailed classifications based on the given description."""

# Create the prompt template
disaster_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "Disaster description: {query}")
    ]
)

# Combine prompt template with structured output
disaster_classifier = disaster_prompt | structured_llm_grader

In [7]:
query = transcription_text
disaster_info = disaster_classifier.invoke({"query": query})

disaster_type = disaster_info.disaster_type
severity = disaster_info.severity
city = disaster_info.city_type
state = disaster_info.state_type

print(f"Disaster Type: {disaster_type}")
print(f"Severity: {severity}")
print(f"City: {city}")
print(f"State: {state}")

Disaster Type: flood
Severity: 3
City: Panji
State: Goa


In [8]:
# Initialize the database credentials
db_config = {
    'host': 'localhost',
    'user': 'root',
    'database': 'sihdb',
    'password': '5'
}

In [9]:
def get_text_from_audio(filename):
    try:
        with open(filename, "rb") as file:
            response = client.audio.transcriptions.create(
                file=(filename, file.read()),
                model="whisper-large-v3",
                response_format="verbose_json"
            )
        transcription_text = response.text
        return transcription_text
    except Exception as e:
        print(f"get_text_from_audio(): An error occurred: {e}")
        return None

def get_disaster_info(transcription_text):
    disaster_info = disaster_classifier.invoke({"query": transcription_text})
    disaster_type = disaster_info.disaster_type
    severity = disaster_info.severity
    city = disaster_info.city_type
    state = disaster_info.state_type
    return disaster_type, severity, city, state

def insert_disaster_info(disaster_type, severity, city, state):
    try:
        connection = pymysql.connect(**db_config)
        with connection.cursor() as cursor:
            cursor.execute("CREATE TABLE IF NOT EXISTS ndrftasks2 (audio_id INT AUTO_INCREMENT PRIMARY KEY, disaster_type VARCHAR(255), severity INT, city VARCHAR(255), state VARCHAR(255))")
            sql = "INSERT INTO ndrftasks2 (disaster_type, severity, city, state) VALUES (%s, %s, %s, %s)"
            cursor.execute(sql, (disaster_type, severity, city, state))
            connection.commit()
    except Exception as e:
        print(f"insert_disaster_info(): An error occurred: {e}")
    finally:
        connection.close()

def process_audio_file(filename):
    transcription_text = get_text_from_audio(filename)
    disaster_type, severity, city, state = get_disaster_info(transcription_text)
    insert_disaster_info(disaster_type, severity, city, state)
    

In [10]:
def process_audio_folder(foldername="./audioFiles"):
    for filename in os.listdir(foldername):
        if filename.endswith(".m4a") or filename.endswith(".mp3") or filename.endswith(".wav"):
            process_audio_file(foldername + "/" + filename)

In [11]:
# to test, uncomment the following line
process_audio_folder()