In [1]:
import os
import librosa
import numpy as np
import scipy.stats

os.environ["OPENAI_API_KEY"] = api_key

In [2]:
audio_file_name = "Phone call with HMRC fraudster in UK 2021.wav"

In [3]:
import os
from openai import OpenAI

client = OpenAI(
    # This is the default and can be omitted
   # api_key=os.environ.get("OPENAI_API_KEY"),
)


def get_gpt_res(ip):
    print("ip for gpt res ",ip)
    print("start of gpt response")
    messages=[
            {"role": "system",
             "content": """As a customer care professional, I want to analyze this audio transcript to detect any
             anomalies or potential fraud"""},
            {"role": "user", "content": ip}]
    chat_completion = client.chat.completions.create(
        messages=messages,
        model="gpt-4o",
    )
    res = chat_completion.choices[0].message.content
    print("end of gpt response")
    print("response got is "+str(res))
    return res

In [4]:
import whisper
import torch
torch.cuda.is_available()

# Load the model from the specified path
model = whisper.load_model("F:/models/small.en.pt",device="cuda")

In [5]:
from pydub import AudioSegment

def extract_audio_segment(file_path,file_name, start_time, end_time):
    
    start_time = start_time*1000
    end_time = end_time*1000
    
    # Load the audio file
    audio = AudioSegment.from_file(file_path+file_name)
    
    # Extract the segment
    extracted_segment = audio[start_time:end_time]

    new_file_name = file_path+f"{start_time}_{end_time}_{file_name}"
    
    # Save the extracted segment
    extracted_segment.export(new_file_name, format="wav")

    return new_file_name

In [6]:
def get_model_op(path):
    print("start of model_op")
    result = model.transcribe(path)
    txt = result["text"]
    print("end of model_op")
    return result,txt

In [7]:
# Dictionary to store the linguistic markers information
linguistic_markers = {
    "Causation": {
        "description": "Providing a certain level of concreteness to an explanation.",
        "words": ["Because", "Effect", "Hence"]
    },
    "Negation": {
        "description": "Avoiding to provide a direct response.",
        "words": ["No", "Not", "Can’t", "Didn’t"]
    },
    "Hedging": {
        "description": "Describes words which meaning implicitly involves fuzziness.",
        "words": ["May be", "I guess", "Sort of"]
    },
    "Qualified assertions": {
        "description": "Unveils questionable actions.",
        "words": ["Needed", "Attempted"]
    },
    "Temporal Lacunae": {
        "description": "Unexplained lapses of time.",
        "words": ["Later that day", "Afterwards"]
    },
    "Overzealous expression": {
        "description": "Expresses some level of uncertainty.",
        "words": ["I swear to God", "Honestly"]
    },
    "Memory loss": {
        "description": "Feigning memory loss.",
        "words": ["I forget", "Can’t remember"]
    },
    "Third person plural pronouns": {
        "description": "Possessive determiners to refer to things or people other than the speaker.",
        "words": ["They", "Them", "Theirs"]
    },
    "Pronouns": {
        "description": "Possessive determiners to refer to the speaker by overemphasising their physical presence.",
        "words": ["I", "Me", "Mine"]
    },
    "Negative emotion": {
        "description": "Negative expressions in word choice.",
        "words": ["Afraid", "Sad", "Hate", "Abandon", "Hurt"]
    },
    "Negative sentiment": {
        "description": "Negative emotional effect.",
        "words": ["Abominable", "Anger", "Anxious", "Bad"]
    },
    "Positive emotion": {
        "description": "Positive expressions in word choice.",
        "words": ["Happy", "Brave", "Love", "Nice", "Sweet"]
    },
    "Positive sentiment": {
        "description": "Positive emotional effect.",
        "words": ["Admire", "Amazing", "Assure", "Charm"]
    },
    "Disfluencies": {
        "description": "Interruption in the flow of speech.",
        "words": ["Uh", "Um", "You know", "Er", "Ah"]
    },
    "Self reference words": {
        "description": "Deceivers tend to use fewer self-referencing expressions.",
        "words": ["I", "My", "Mine"]
    },
    "Nominalised verbs": {
        "description": "Nouns derived from verbs. Nominalisations tend to hide the real action.",
        "words": ["Education", "Arrangement"]
    }
}

def get_features_2(txt):
    op = """Given below is a transcript enclosed in {} brackets. 
    The json below has category name as the key.
    For each category name you are given 
    (a) certain words that fall into this category ('words')
    (b) description of the category ('description')
    
    JSON : 
    """+str(linguistic_markers)+"""
    
    1) For each category provide a dictionary of words along with its frequency of occurance in the transcript
    2) calculate the overall sentiment of the transcript on a scale of -1 to +1 where -1 is extremely negative and +1 is extremely postive
    
    Give your response as json with keys as category name (as given in data) and 'sentiment'
    
    Transcript : {"""+txt+"""}
    
    Please Note it is very important that your response should be a valid json object directly readable using 
    json.loads()
    Example response 
    {
    'key1':'value1',
    'key2':'value2',
    'key3':'value3'
    }
    """
    r1 = get_gpt_res(op)
    print(r1)
    return r1

In [8]:
#!pip install pyaudio

In [9]:
import wave
import pyaudio
import time
import threading
import keyboard



In [10]:
def get_metric_obj(d, description, value, name):
    obj = {}
    obj['description'] = description
    obj['value'] = value
    obj['name'] = name
    d.append(obj)
   # return d

def get_features(y, sr):
    print("len of y trunc is "+str(len(y)))
    print("get features start")
    d = []

    zcr = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0)
    description = "Zero-Crossing Rate (ZCR): High ZCR indicates more frequent sign changes in the signal, capturing noisiness and abrupt changes in speech, which can signal stress or nervousness."
    get_metric_obj(d, description, zcr[0], 'zero_crossing_rate')

    rmse = np.mean(librosa.feature.rms(y=y).T, axis=0)
    description = "Root Mean Square Energy (RMSE): Measures the loudness of speech. Sudden changes or high energy can indicate emotional stress or intentional emphasis."
    get_metric_obj(d, description, rmse[0], 'root_mean_square_energy')

   # frame_length = 2048
   # hop_length = 512
   # energy = np.array([
   #     sum(abs(y[i:i+frame_length]**2))
   #     for i in range(0, len(y), hop_length)
   # ])
   # description = "Short-Time Energy: Highlights bursts of sound and pauses, indicating unnatural or rehearsed speech patterns."
   # get_metric_obj(d, description, np.mean(energy), 'short_time_energy')

    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T, axis=0)
    description = "Spectral Centroid: Captures the center of mass of the spectrum, indicating the 'brightness' of the sound. Deviations from normal patterns can indicate emotional stress or emphasis."
    get_metric_obj(d, description, spectral_centroid[0], 'spectral_centroid')

    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr).T, axis=0)
    description = "Spectral Bandwidth: Measures the range of frequencies. Wider bandwidth can indicate stressed speech or background noise."
    get_metric_obj(d, description, spectral_bandwidth[0], 'spectral_bandwidth')

    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr).T, axis=0)
    description = "Spectral Roll-off: Helps to identify voiced vs. unvoiced sounds. Higher roll-off can indicate abrupt changes in speech patterns."
    get_metric_obj(d, description, spectral_rolloff[0], 'spectral_rolloff')

    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    pitch = []
    for t in range(pitches.shape[1]):
        index = magnitudes[:, t].argmax()
        pitch.append(pitches[index, t])
    pitch_mean = np.mean([p for p in pitch if p > 0])
    description = "Pitch: Variations in pitch can indicate emotional stress or intentional modulation used in deception."
    get_metric_obj(d, description, pitch_mean, 'pitch')

 #   harmonic_to_noise_ratio = np.mean(librosa.effects.harmonic(y))
 #   description = "Harmonics-to-Noise Ratio (HNR): Measures the ratio of harmonic sounds to noise. Lower HNR indicates more noise and potential stress in the voice."
 #   get_metric_obj(d, description, harmonic_to_noise_ratio, 'harmonic_to_noise_ratio')

    intonation = np.std(pitches[pitches > 0])
    description = "Prosodic Features (e.g., intonation): Includes intonation, stress, and rhythm. Irregularities in these features can indicate emotional stress or rehearsed speech."
    get_metric_obj(d, description, intonation, 'intonation')

    duration = librosa.get_duration(y=y, sr=sr)
    words = len(librosa.effects.split(y, top_db=20))
    speaking_rate = words / duration
    description = "Speaking Rate: The rate of speech. Unnaturally fast or slow speaking rates can indicate nervousness or rehearsed speech."
    get_metric_obj(d, description, speaking_rate, 'speaking_rate')
    print("get features end")
  #  print("d -->",d)
    return d


In [11]:
global dg
dg = {}

In [12]:
import json

def string_to_json(sample_string):
    sample_string = sample_string.replace("json","")
    print("sample string is ",sample_string)
    jl = json.loads(sample_string)
    return jl

def sum_values(string):
    print("start of sum_values")
    data = string_to_json(string)
    reduced_dict = {}
    for key, sub_dict in data.items():
        if isinstance(sub_dict, dict):
            total_sum = sum(sub_dict.values())
            reduced_dict[key] = float(total_sum)
        else:
            reduced_dict[key] = float(sub_dict)

    print("end of sum_values")
    return reduced_dict

In [13]:

def get_time_series_view(ds,fl):
    print("inside get_time_series")
    global dg
    for d in ds:
        n = d['name']
        if n not in dg.keys():
            dg[n]=[d['value']]
        else:
            dg[n].append(d['value'])    

  #  print("dg is -->",dg)
    df = pd.DataFrame(dg)
    df = df.reset_index(inplace=False)
    print(df.head())
    plot_dataframe(df,fl)

In [14]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots


def plot_dataframe(df,fl):
    print("plot_dataframes start")
    # Create the figure
    fig = go.Figure()
    
    # Add a trace for each column except 'index'
    for column in df.columns:
        if column != 'index':
            fig.add_trace(go.Scatter(x=df['index'], y=df[column], mode='lines', name=column))
    
    # Update layout
    fig.update_layout(
        title='Plot of DataFrame Columns',
        xaxis_title='Index',
        yaxis_title='Values',
        xaxis=dict(range=[0,fl+10]),
        template='plotly_white'
    )
    print("# Save the figure as an HTML file")
    fig.write_html("sample_plot.html")
    # Show the figure
    #fig.show()

In [15]:


# Global flag to stop the playback and thread
stop_flag = False
global ckp_count
ckp_count = 0


global fet2


def make_my_fet2(ip):
    pass
# Function to play the wav file and call the get_features function every ckp seconds
def play_wav_with_checkpoints(file_path, ckp):
    global stop_flag

    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Open the .wav file
    wf = wave.open(file_path, 'rb')

    # Create a PyAudio instance
    p = pyaudio.PyAudio()

    # Open a stream to play the audio
    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                    channels=wf.getnchannels(),
                    rate=wf.getframerate(),
                    output=True)

    # Read data in chunks
    chunk = 1024
    data = wf.readframes(chunk)

    # Define a function to handle checkpoints
    def checkpoint_handler():
        print("pt")
        while not stop_flag:
            time.sleep(ckp)
            global ckp_count
            ckp_count += 1
            print("ckp_count is ",ckp_count)
            if not stop_flag:
                # Get the current time in the audio
               # current_time = stream.get_time()
                # Extract the current portion of the audio
                print("y len is "+str(len(y)))
              #  print("current_time is "+str(current_time)+" sr is -->"+str(sr))

                fl  = len(y)/(ckp* sr)
                y_current = y[:int(ckp_count*ckp* sr)]
                features = get_features(y_current, sr)

                 #Example usage:
                new_file = extract_audio_segment("",audio_file_name, 0,ckp_count*ckp )
                print(f"New file saved as: {new_file}")

                res,txt = get_model_op(new_file)
                print(txt)
                fet2 = get_features_2(txt)
                fet2_pp = sum_values(fet2)
                print(fet2_pp)
                print(features)
                get_time_series_view(features,fl)
                print(features)

    # Start the checkpoint handler in a separate thread
    checkpoint_thread = threading.Thread(target=checkpoint_handler)
    checkpoint_thread.daemon = True
    checkpoint_thread.start()

    # Function to stop the playback and thread
    def stop_playback():
        global stop_flag
        stop_flag = True
        print("Stopping playback and checkpoint handler...")

    # Set up the key listener for the "Esc" key
    keyboard.add_hotkey('esc', stop_playback)

    # Play the audio
    while data and not stop_flag:
        stream.write(data)
        data = wf.readframes(chunk)

    # Close the stream and PyAudio
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("Audio playback stopped.")


In [16]:
# Example usage
ckp = 20  # Duration in seconds
play_wav_with_checkpoints(audio_file_name, ckp)

pt
ckp_count is  1
y len is 11956224
len of y trunc is 882000
get features start
get features end
New file saved as: 0_20000_Phone call with HMRC fraudster in UK 2021.wav
start of model_op
end of model_op
 There is a criminal case that is listed under your name for tax fraud and tax evasion and there is also a warrant out for your arrest now. There's a warrant out for my arrest. This is a recordable line sir. I have to play this recording in the court also. We don't need any interruption in this court. Sorry this is going to get played in court. Yeah the recording is going to be in court.
ip for gpt res  Given below is a transcript enclosed in {} brackets. 
    The json below has category name as the key.
    For each category name you are given 
    (a) certain words that fall into this category ('words')
    (b) description of the category ('description')
    
    JSON : 
    {'Causation': {'description': 'Providing a certain level of concreteness to an explanation.', 'words': ['Beca

Exception in thread Thread-7 (checkpoint_handler):
Traceback (most recent call last):
  File "C:\Users\amrit\.conda\envs\personalEnv2\Lib\threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "C:\Users\amrit\.conda\envs\personalEnv2\Lib\threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\amrit\AppData\Local\Temp\ipykernel_28576\3664538408.py", line 61, in checkpoint_handler
  File "C:\Users\amrit\AppData\Local\Temp\ipykernel_28576\816564092.py", line 11, in sum_values
  File "C:\Users\amrit\AppData\Local\Temp\ipykernel_28576\816564092.py", line 6, in string_to_json
  File "C:\Users\amrit\.conda\envs\personalEnv2\Lib\json\__init__.py", line 346, in loads
    return _default_decoder.decode(s)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\amrit\.conda\envs\personalEnv2\Lib\json\decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File 

end of gpt response
response got is ```json
{
    "Causation": {
        "Because": 0,
        "Effect": 0,
        "Hence": 0
    },
    "Negation": {
        "No": 0,
        "Not": 1,
        "Can’t": 0,
        "Didn’t": 0
    },
    "Hedging": {
        "May be": 0,
        "I guess": 0,
        "Sort of": 0
    },
    "Qualified assertions": {
        "Needed": 0,
        "Attempted": 0
    },
    "Temporal Lacunae": {
        "Later that day": 0,
        "Afterwards": 0
    },
    "Overzealous expression": {
        "I swear to God": 0,
        "Honestly": 0
    },
    "Memory loss": {
        "I forget": 0,
        "Can’t remember": 0
    },
    "Third person plural pronouns": {
        "They": 0,
        "Them": 0,
        "Theirs": 0
    },
    "Pronouns": {
        "I": 2,
        "Me": 0,
        "Mine": 0
    },
    "Negative emotion": {
        "Afraid": 0,
        "Sad": 0,
        "Hate": 0,
        "Abandon": 0,
        "Hurt": 0
    },
    "Negative sentiment": {
    

KeyboardInterrupt: 

In [19]:
t = """Given below is a transcript enclosed in {} brackets. 
    The json below has category name as the key.
    For each category name you are given 
    (a) certain words that fall into this category ('words')
    (b) description of the category ('description')
    
    JSON : 
    {'Causation': {'description': 'Providing a certain level of concreteness to an explanation.', 'words': ['Because', 'Effect', 'Hence']}, 'Negation': {'description': 'Avoiding to provide a direct response.', 'words': ['No', 'Not', 'Can’t', 'Didn’t']}, 'Hedging': {'description': 'Describes words which meaning implicitly involves fuzziness.', 'words': ['May be', 'I guess', 'Sort of']}, 'Qualified assertions': {'description': 'Unveils questionable actions.', 'words': ['Needed', 'Attempted']}, 'Temporal Lacunae': {'description': 'Unexplained lapses of time.', 'words': ['Later that day', 'Afterwards']}, 'Overzealous expression': {'description': 'Expresses some level of uncertainty.', 'words': ['I swear to God', 'Honestly']}, 'Memory loss': {'description': 'Feigning memory loss.', 'words': ['I forget', 'Can’t remember']}, 'Third person plural pronouns': {'description': 'Possessive determiners to refer to things or people other than the speaker.', 'words': ['They', 'Them', 'Theirs']}, 'Pronouns': {'description': 'Possessive determiners to refer to the speaker by overemphasising their physical presence.', 'words': ['I', 'Me', 'Mine']}, 'Negative emotion': {'description': 'Negative expressions in word choice.', 'words': ['Afraid', 'Sad', 'Hate', 'Abandon', 'Hurt']}, 'Negative sentiment': {'description': 'Negative emotional effect.', 'words': ['Abominable', 'Anger', 'Anxious', 'Bad']}, 'Positive emotion': {'description': 'Positive expressions in word choice.', 'words': ['Happy', 'Brave', 'Love', 'Nice', 'Sweet']}, 'Positive sentiment': {'description': 'Positive emotional effect.', 'words': ['Admire', 'Amazing', 'Assure', 'Charm']}, 'Disfluencies': {'description': 'Interruption in the flow of speech.', 'words': ['Uh', 'Um', 'You know', 'Er', 'Ah']}, 'Self reference words': {'description': 'Deceivers tend to use fewer self-referencing expressions.', 'words': ['I', 'My', 'Mine']}, 'Nominalised verbs': {'description': 'Nouns derived from verbs. Nominalisations tend to hide the real action.', 'words': ['Education', 'Arrangement']}}
    
    1) For each category provide a dictionary of words along with its frequency of occurance in the transcript
    2) calculate the overall sentiment of the transcript on a scale of -1 to +1 where -1 is extremely negative and +1 is extremely postive
    
    Give your response as json with keys as category name (as given in data) and 'sentiment'
    
    Transcript : { There is a criminal case that is listed under your name for tax fraud and tax evasion and there is also a warrant out for your arrest now. There's a warrant out for my arrest. This is a recordable line sir. I have to play this recording in the court also. We don't need any interruption in this court. Sorry this is going to get played in court. Yeah the recording is going to be in court.}
    
    Please Note it is very important that your response should be a valid json object directly readable using 
    json.loads()
    Example response 
    {
    'key1':'value1',
    'key2':'value2',
    'key3':'value3'
    }"""

fet2 = get_features_2(t)
fet2_pp = sum_values(fet2)
print("#################################")
print(fet2_pp)

ip for gpt res  Given below is a transcript enclosed in {} brackets. 
    The json below has category name as the key.
    For each category name you are given 
    (a) certain words that fall into this category ('words')
    (b) description of the category ('description')
    
    JSON : 
    {'Causation': {'description': 'Providing a certain level of concreteness to an explanation.', 'words': ['Because', 'Effect', 'Hence']}, 'Negation': {'description': 'Avoiding to provide a direct response.', 'words': ['No', 'Not', 'Can’t', 'Didn’t']}, 'Hedging': {'description': 'Describes words which meaning implicitly involves fuzziness.', 'words': ['May be', 'I guess', 'Sort of']}, 'Qualified assertions': {'description': 'Unveils questionable actions.', 'words': ['Needed', 'Attempted']}, 'Temporal Lacunae': {'description': 'Unexplained lapses of time.', 'words': ['Later that day', 'Afterwards']}, 'Overzealous expression': {'description': 'Expresses some level of uncertainty.', 'words': ['I swear 

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [1]:
mystr = """     [   {   1:'hello'   ,   'y':4,      "you"   :"have"}, {1:'hello'}]"""

In [2]:
mystr = mystr.strip()

In [29]:
mystr

'[   {   1:\'hello\'   ,   \'y\':4,      "you"   :"have"}, {1:\'hello\'}]'

In [None]:

if mystr[0]=="[" and mystr[-1]=="]":
    
else:
    return {}

In [None]:
mystr.split()

In [None]:


sample_string = '''
{
    "Causation": {
        "Because": 0,
        "Effect": 1,
        "Hence": 1
    },
    "Negation": {
        "No": 0,
        "Not": 3,
        "Can’t": 2,
        "Didn’t": 0
    },
    "Hedging": {
        "May be": 0,
        "I guess": 0,
        "Sort of": 0
    },
    "Qualified assertions": {
        "Needed": 0,
        "Attempted": 0
    },
    "Temporal Lacunae": {
        "Later that day": 0,
        "Afterwards": 0
    },
    "Overzealous expression": {
        "I swear to God": 0,
        "Honestly": 0
    },
    "Memory loss": {
        "I forget": 0,
        "Can’t remember": 0
    },
    "Third person plural pronouns": {
        "They": 0,
        "Them": 0,
        "Theirs": 0
    },
    "Pronouns": {
        "I": 0,
        "Me": 0,
        "Mine": 0
    },
    "Negative emotion": {
        "Afraid": 0,
        "Sad": 0,
        "Hate": 0,
        "Abandon": 0,
        "Hurt": 0
    },
    "Negative sentiment": {
        "Abominable": 0,
        "Anger": 0,
        "Anxious": 0,
        "Bad": 0
    },
    "Positive emotion": {
        "Happy": 0,
        "Brave": 0,
        "Love": 0,
        "Nice": 0,
        "Sweet": 0
    },
    "Positive sentiment": {
        "Admire": 0,
        "Amazing": 0,
        "Assure": 0,
        "Charm": 0
    },
    "Disfluencies": {
        "Uh": 0,
        "Um": 0,
        "You know": 0,
        "Er": 0,
        "Ah": 0
    },
    "Self reference words": {
        "I": 0,
        "My": 0,
        "Mine": 0
    },
    "Nominalised verbs": {
        "Education": 0,
        "Arrangement": 0
    },
    "sentiment": -0.8
}
'''

json_data = string_to_json(sample_string)
reduced_dict = sum_values(json_data)
print(reduced_dict)


Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...


In [None]:
from pytube import YouTube
from pydub import AudioSegment
import os

def download_youtube_audio(url):
    print(url)
    # Create a YouTube object
    yt = YouTube(url)
    
    # Extract the highest quality audio stream available
    audio_stream = yt.streams.filter(only_audio=True).first()
    
    # Get the video title and sanitize it for use as a filename
    video_title = yt.title
    safe_title = "".join([c if c.isalnum() or c in (' ', '.', '_') else '_' for c in video_title])
    output_path = f"{safe_title}.wav"
    print("h1")
    # Download the audio stream
    audio_file_path = audio_stream.download(filename="temp_audio")
    
    # Convert the downloaded audio to .wav format using pydub
    audio = AudioSegment.from_file(audio_file_path)
    audio.export(output_path, format="wav")
    print("h2")
    # Remove the temporary audio file
    os.remove(audio_file_path)

    print(f"Audio downloaded and saved as {output_path}")

# Example usage
vs = ['https://www.youtube.com/watch?v=q_qdC6grfIA', 'https://www.youtube.com/watch?v=n4iN8fras1Y']
for video_url in vs:
   pass
    # download_youtube_audio(video_url)

## Extract audio features

In [7]:
#!jupyter --config-dir

In [26]:
import time

start_time = None

def g():
    global start_time
    
    if start_time is None:
        # Record the start time
        start_time = time.time()
        #print("Timer started.")
    else:
        # Calculate the time interval
        end_time = time.time()
        interval = (end_time - start_time) / 60  # Convert to minutes
        print(f"Time interval: {interval:.2f} minutes")
        # Reset the start time
        start_time = None


Dummy function called!
Dummy function called!
Dummy function called!
Stopping playback and checkpoint handler...
Dummy function called!
Dummy function called!
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Stopping playback and checkpoint handler...
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!


Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!
Dummy function called!


Zero-Crossing Rate (ZCR): High ZCR indicates more frequent sign changes in the signal, capturing noisiness and abrupt changes in speech, which can signal stress or nervousness.
Time interval: 0.01 minutes
Root Mean Square Energy (RMSE): Measures the loudness of speech. Sudden changes or high energy can indicate emotional stress or intentional emphasis.
Time interval: 0.02 minutes
Spectral Centroid: Captures the center of mass of the spectrum, indicating the 'brightness' of the sound. Deviations from normal patterns can indicate emotional stress or emphasis.
Time interval: 0.03 minutes
Spectral Bandwidth: Measures the range of frequencies. Wider bandwidth can indicate stressed speech or background noise.
Time interval: 0.04 minutes
Spectral Roll-off: Helps to identify voiced vs. unvoiced sounds. Higher roll-off can indicate abrupt changes in speech patterns.
Time interval: 0.03 minutes
Pitch: Variations in pitch can indicate emotional stress or intentional modulation used in deception.


{'zcr': 0.044475452999882245,
 'rmse': 0.22577345,
 'spectral_centroid': 1791.1599853658656,
 'spectral_bandwidth': 1993.0626842945642,
 'spectral_rolloff': 3088.9584122288893,
 'pitch': 737.019,
 'intonation': 1081.7798,
 'speaking_rate': 1.3315324303057554}

In [39]:
'''# 7. Mel-Frequency Cepstral Coefficients (MFCCs)
mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
print("Mel-Frequency Cepstral Coefficients (MFCCs): Capture the power spectrum of the speech signal and can highlight deviations in vocal tract characteristics, indicating stress or deception.")
print(mfccs)'''

'# 7. Mel-Frequency Cepstral Coefficients (MFCCs)\nmfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)\nprint("Mel-Frequency Cepstral Coefficients (MFCCs): Capture the power spectrum of the speech signal and can highlight deviations in vocal tract characteristics, indicating stress or deception.")\nprint(mfccs)'

In [40]:
'''# 8. Chroma Feature
chroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
print("Chroma Feature: Represents harmonic content. Anomalous chroma patterns can indicate stress or unnatural speech modulation.")
print(chroma_stft)'''

'# 8. Chroma Feature\nchroma_stft = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)\nprint("Chroma Feature: Represents harmonic content. Anomalous chroma patterns can indicate stress or unnatural speech modulation.")\nprint(chroma_stft)'

In [41]:
'''# 10. Formant Frequencies (Approximated using LPC)
lpc_coeffs = librosa.lpc(y, order=2)
formants = np.roots(lpc_coeffs)
formants = [np.abs(f) for f in formants if np.imag(f) == 0]
formants = np.mean(formants) if formants else 0
print("Formant Frequencies: Formants are resonant frequencies of the vocal tract. Unusual formant patterns can indicate stress or intentional voice alteration.")
print(formants)'''

'# 10. Formant Frequencies (Approximated using LPC)\nlpc_coeffs = librosa.lpc(y, order=2)\nformants = np.roots(lpc_coeffs)\nformants = [np.abs(f) for f in formants if np.imag(f) == 0]\nformants = np.mean(formants) if formants else 0\nprint("Formant Frequencies: Formants are resonant frequencies of the vocal tract. Unusual formant patterns can indicate stress or intentional voice alteration.")\nprint(formants)'

In [42]:
''' # 12. Voice Activity Detection (VAD)
intervals = librosa.effects.split(y, top_db=20)
vad = len(intervals) / len(y)
print("Voice Activity Detection (VAD): Identifies segments of speech vs. silence. Unusual patterns can indicate scripted or unnatural speech.")
print(intervals[0:10])'''

' # 12. Voice Activity Detection (VAD)\nintervals = librosa.effects.split(y, top_db=20)\nvad = len(intervals) / len(y)\nprint("Voice Activity Detection (VAD): Identifies segments of speech vs. silence. Unusual patterns can indicate scripted or unnatural speech.")\nprint(intervals[0:10])'

In [None]:
# sentiment

Given below is a transcript enclosed in {} brackets. 
The json below has category name as the key.
For each category name you are given 
(a) certain words that fall into this category ('words')
(b) description of the category ('description')

JSON : 
{'Causation': {'description': 'Providing a certain level of concreteness to an explanation.', 'words': ['Because', 'Effect', 'Hence']}, 'Negation': {'description': 'Avoiding to provide a direct response.', 'words': ['No', 'Not', 'Can’t', 'Didn’t']}, 'Hedging': {'description': 'Describes words which meaning implicitly involves fuzziness.', 'words': ['May be', 'I guess', 'Sort of']}, 'Qualified assertions': {'description': 'Unveils questionable actions.', 'words': ['Needed', 'Attempted']}, 'Temporal Lacunae': {'description': 'Unexplained lapses of time.', 'words': ['Later that day', 'Afterwards']}, 'Overzealous expression': {'description': 'Expresses some level of uncertainty.', 'words': ['I swear to God', 'Honestly']}, 'Memory loss': {'desc

In [62]:
print(r1)

```json
{
  "Causation": {
    "words": {
      "Because": 1,
      "Effect": 0,
      "Hence": 0
    }
  },
  "Negation": {
    "words": {
      "No": 3,
      "Not": 4,
      "Can’t": 1,
      "Didn’t": 0
    }
  },
  "Hedging": {
    "words": {
      "May be": 0,
      "I guess": 0,
      "Sort of": 1
    }
  },
  "Qualified assertions": {
    "words": {
      "Needed": 0,
      "Attempted": 0
    }
  },
  "Temporal Lacunae": {
    "words": {
      "Later that day": 0,
      "Afterwards": 0
    }
  },
  "Overzealous expression": {
    "words": {
      "I swear to God": 0,
      "Honestly": 0
    }
  },
  "Memory loss": {
    "words": {
      "I forget": 0,
      "Can’t remember": 0
    }
  },
  "Third person plural pronouns": {
    "words": {
      "They": 3,
      "Them": 0,
      "Theirs": 0
    }
  },
  "Pronouns": {
    "words": {
      "I": 13,
      "Me": 2,
      "Mine": 0
    }
  },
  "Negative emotion": {
    "words": {
      "Afraid": 0,
      "Sad": 0,
      "Hate": 0,
  

In [41]:
linguistic_markers

{'Causation': {'description': 'Providing a certain level of concreteness to an explanation.',
  'words': ['Because', 'Effect', 'Hence']},
 'Negation': {'description': 'Avoiding to provide a direct response.',
  'words': ['No', 'Not', 'Can’t', 'Didn’t']},
 'Hedging': {'description': 'Describes words which meaning implicitly involves fuzziness.',
  'words': ['May be', 'I guess', 'Sort of']},
 'Qualified assertions': {'description': 'Unveils questionable actions.',
  'words': ['Needed', 'Attempted']},
 'Temporal Lacunae': {'description': 'Unexplained lapses of time.',
  'words': ['Later that day', 'Afterwards']},
 'Overzealous expression': {'description': 'Expresses some level of uncertainty.',
  'words': ['I swear to God', 'Honestly']},
 'Memory loss': {'description': 'Feigning memory loss.',
  'words': ['I forget', 'Can’t remember']},
 'Third person plural pronouns': {'description': 'Possessive determiners to refer to things or people other than the speaker.',
  'words': ['They', 'Them'

In [38]:
print("""As a customer care professional, I want to analyze this audio transcript to detect any
             anomalies or potential fraud. Please identify and highlight instances of the following markers,
             and provide a summary of the context in which they are used. Also please indicate if this is a potential
             fraud with a confidence score: Causation: Keywords indicating cause-and-effect relationships, providing a certain level of
             concreteness to an explanation. Examples: 'Because,' 'Effect,' 'Hence.' Negation: Words or phrases
             avoiding a direct response. Examples: 'No, ' 'Not,' 'Canâ€™t,' 'Didnâ€™t.' Hedging: Words that implicitly
             involve fuzziness. Examples: 'May be, ' 'I guess,' 'Sort of.' Qualified Assertions: Phrases that unveil
             questionable actions. Examples: 'Needed,' 'Attempted.' Temporal Lacunae: Unexplained lapses of time.
             Examples: 'Later that day, ' 'Afterwards.' Overzealous Expression: Phrases expressing some level of
             uncertainty. Examples: 'I swear to God,' 'Honestly.' Memory Loss: Phrases feigning memory loss.
             Examples: 'I forget,' 'Canâ€™t remember.' Third Person Plural Pronouns: Use of pronouns to refer to
             others. Examples: 'They,' 'Them,' 'Theirs.' Self-Reference Words: Pronouns referring to the speaker,
             often emphasizing their physical presence. Examples: 'I,' 'Me,' 'Mine.' Negative Emotion: Words with
             negative connotations. Examples: 'Afraid, ' 'Sad,' 'Hate,' 'Abandon,' 'Hurt.' Negative Sentiment: Words
             indicating negative emotional effects. Examples: 'Abominable,' 'Anger,' 'Anxious,' 'Bad.' Positive
             Emotion: Words with positive connotations. Examples: 'Happy,' 'Brave,' 'Love,' 'Nice,' 'Sweet.' Positive
             Sentiment: Words indicating positive emotional effects. Examples: 'Admire,' 'Amazing,' 'Assure,
             ' 'Charm.' Disfluencies: Interruptions in the flow of speech. Examples: 'Uh,' 'Um,' 'You know,' 'Er,
             ' 'Ah.' Nominalized Verbs: Nouns derived from verbs, often hiding the real action. Examples: 'Education,
             ' 'Arrangement.'""")

As a customer care professional, I want to analyze this audio transcript to detect any
             anomalies or potential fraud. Please identify and highlight instances of the following markers,
             and provide a summary of the context in which they are used. Also please indicate if this is a potential
             fraud with a confidence score: Causation: Keywords indicating cause-and-effect relationships, providing a certain level of
             concreteness to an explanation. Examples: 'Because,' 'Effect,' 'Hence.' Negation: Words or phrases
             avoiding a direct response. Examples: 'No, ' 'Not,' 'Canâ€™t,' 'Didnâ€™t.' Hedging: Words that implicitly
             involve fuzziness. Examples: 'May be, ' 'I guess,' 'Sort of.' Qualified Assertions: Phrases that unveil
             questionable actions. Examples: 'Needed,' 'Attempted.' Temporal Lacunae: Unexplained lapses of time.
             Examples: 'Later that day, ' 'Afterwards.' Overzealous Expression: Phrases 

True

In [46]:
# Print the transcription result
print(result["text"])

 There is a criminal case that is listed under your name for tax fraud and tax evasion and there is also a warrant out for your arrest now. There's a warrant out for my arrest. This is a recordable line sir. I have to play this recording in the court house so we don't need any interruption in this call. Sorry this is going to get played in court. Yeah the recording is going to be played in the court house. We found out a miscalculation of 1693 pounds outstanding under your name. So at this point of time we have only two options. The first option is to go to court and fight the case. In case if you're found guilty you have to pay a penalty fine of 19,000 pounds and a reasonable of two years and if you want to resolve the matter outside of the court house then you have to pay the outstanding amount which is 1693 pounds to the government. If it's fine it was not your intention to deport the HMRC then this whole money is going to be defunded back to you. Whether you want to do you want to 

Markers detected:

Causation: 'If,' 'Because'
Negation: 'No,' 'Not'
Hedging: 'Sort of'
Qualified Assertions: 'Need'
Temporal Lacunae: None
Overzealous Expression: None
Memory Loss: None
Third Person Plural Pronouns: 'They'
Self-Reference Words: 'I,' 'Me,' 'My'
Negative Emotion: 'Afraid'
Negative Sentiment: 'Criminal,' 'Fraud,' 'Evasion,' 'Arrest,' 'Guilty,' 'Penalty,' 'Warrant'
Positive Emotion: None
Positive Sentiment: None
Disfluencies: 'Uh,' 'Um'
Nominalized Verbs: 'Interruption,' 'Recording,' 'Court,' 'House'

Summary:
An unidentified individual purporting to be a representative of HMRC (Her Majesty's Revenue and Customs) informs the speaker that he is implicated in tax fraud and evasion, and that there is an arrest warrant. The individual presents the speaker with two options: go to court or pay the outstanding amount (1693 pounds) directly to the government. Initially flustered and confused, the speaker asks for more details, clarification, and evidence of the individual's identi

In [None]:
###
# giving an id to each voice
# extarct meaning from voice
# detect if this person is trying to 
# person is lying about details


In [5]:
with open("F:/office_work/voice_fraud/fraud_call.file", 'r') as file:
    lines = file.readlines()
    
for i, line in enumerate(lines):
    if len(line.split('\t')) != 2:
        print(f"Line {i+1}: {line}")


Line 456: normal	When're you guys getting back? G said you were thinking about not staying for mcr.normla	

Line 5928: 

Line 5929: 

Line 5930: 

Line 5931: 

Line 5932: 



In [8]:
fraud = pd.DataFrame(lines)

In [10]:
lines[0].split("\t")

['fraud',
 'hello, i m bank manager of SBI, ur debit card is about to expire would u want to issue new  card.\n']

In [36]:
def pp(r):
    t = r[0].split("\t")
    if len(t)!=2:
        return "none","none",0
    else:
        return t[0],t[1],len(t[1].split())

In [37]:
r = fraud.apply(pp,axis=1)

In [38]:
fraud_calls = pd.DataFrame(list(r))

In [39]:
fraud_calls.columns = ['label','text','length']

In [40]:
fraud_calls_fil = fraud_calls[~fraud_calls['length'].isin([0])]

In [41]:
fraud_calls_fil.shape[0]/fraud_calls.shape[0]

0.9989885367498315

In [45]:
pd.set_option('display.max_rows', 999)

In [48]:
pd.DataFrame(fraud_calls_fil['length'].value_counts()).reset_index().sort_values(by='length')

Unnamed: 0,length,count
33,1,40
32,2,41
26,3,94
7,4,211
3,5,382
0,6,451
1,7,433
2,8,420
4,9,317
5,10,280


In [None]:
sj = """  [{"key1":"value1"},  {"key2":"value2"},
{"key3":"value3"}]

"""

sj = sj.strip()
#assuming key does not have : in it
if sj[0]=="[" and sj[-1]=="]":

else:
   if sj[0]=="{" and sj[-1]=="}":
       sj = """{  'key1':'val:,','ue1', ''key2':"va',:lue2 " ,key3: "val..,,:':'""''ue3"}"""
       sj = """{}"""
       sj = sj[1:-2]
       
   else:
       print("Error")

In [None]:
give a list of keys
keys enclosed in "" or '' brackets
has 'key1'%:

In [23]:
import re
from typing import List


def create_regex_for_rest_key(key: str) -> str:
    # Escape the key to handle any special characters
    # Construct the regex pattern
    regex_pattern = r',\s*["\']' + str(key) + r'["\']\s*:'
    
    return regex_pattern

def create_regex_for_first_key(key: str) -> str:
    # Escape the key to handle any special characters
    # Construct the regex pattern
    regex_pattern = r'\s*["\']' + str(key) + r'["\']\s*:'
    
    return regex_pattern


def split_string_by_regex(input_string, keys_list,first_key=""):

    if first_key=="":
        first_key = keys_list[0]

    
    r1 = create_regex_for_first_key(first_key)

    rs = []

    for kl in keys_list:
        if kl !=first_key:
            r = create_regex_for_rest_key(first_key)
            rs.append(r)


    rs = [r1] + rs

    regex_list = rs
    print(regex_list)
    # Combine all regex patterns into one by joining them with '|'
    combined_regex = '|'.join(f'({regex})' for regex in regex_list)
    
    # Split the input string using the combined regex pattern
    result = re.split(combined_regex, input_string)
    
    # Filter out empty strings from the result
    result = [s for s in result if s]
    
    return result

In [24]:
#split_string_by_regex*

In [25]:
#(,)(any number of whitespaces)("or')key("or')(any number of whitespaces)(:)

In [50]:
sj = """{ 
'key1':"val:,','ue1",
"key2":"va',:lue2 " ,
"key3": "val..,,:':'""''ue3"}"""
import json, ast
ast.literal_eval(sj)
#sj.split("key1")

{'key1': "val:,','ue1", 'key2': "va',:lue2 ", 'key3': "val..,,:':'''ue3"}

In [44]:
re.split(r',\s*["\']' + str("key1") + r'["\']\s*:',sj)[0]

'{ \n\'key1\':\'val:,\',\'ue1\',\n\'key2\':"va\',:lue2 " ,\n\'key3\': "val..,,:\':\'""\'\'ue3"}'

In [34]:
ss = split_string_by_regex(sj,['key1','key2','key3'])

for s in ss:
    print(s)
    print("##########")

['\\s*["\\\']key1["\\\']\\s*:', ',\\s*["\\\']key1["\\\']\\s*:', ',\\s*["\\\']key1["\\\']\\s*:']
{
##########
 
'key1':
##########
'val:,','ue1',
'key2':"va',:lue2 " ,
'key3': "val..,,:':'""''ue3"}
##########


In [16]:
ss

['{  \'key1\':\'val:,\',\'ue1\', \'\'key2\':"va\',:lue2 " ,key3: "val..,,:\':\'""\'\'ue3"}']

In [None]:
def get_key_value()