In [1]:
import pandas as pd


In [3]:
df = pd.read_csv('Book(Sheet1).csv')

In [4]:
print(df.head())

     Company  Year Total Revenue Net Income Total Assets Total Liabilities  \
0  Microsoft  2023      2,11,915     72,361     4,11,976          2,05,753   
1  Microsoft  2022      1,98,270     72,738     3,64,840          1,98,298   
2  Microsoft  2021      1,68,088     61,271     3,04,180          1,91,791   
3      Tesla  2023        96,773     12,600     1,06,618            43,009   
4      Tesla  2022        81,462     11,000       82,338            36,440   

   Cash Flow from Operating Activities  
0                                   14  
1                                    6  
2                                   19  
3                                   10  
4                                    7  


In [5]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize



In [6]:
# Download NLTK data if not already downloaded
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /Users/apple/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /Users/apple/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [7]:
# Define stop words for filtering
stop_words = set(stopwords.words('english'))

In [8]:
def preprocess_query(query):
    # Tokenize and filter stopwords
    tokens = word_tokenize(query.lower())
    filtered_tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
    return filtered_tokens

def match_query_to_intent(tokens):
    # Define predefined intents (queries)
    intents = {
        'total revenue': 'Total Revenue',
        'net income': 'Net Income',
        'total assets': 'Total Assets',
        'total liabilities': 'Total Liabilities',
        'cash flow': 'Cash Flow from Operating Activities'
    }
    
    # Match tokens to intents
    for token in tokens:
        for intent_key, intent_value in intents.items():
            if token in intent_key:
                return intent_value
    
    return None

def financial_chatbot(query):
    try:
        # Preprocess the user query
        tokens = preprocess_query(query)
        
        # Match tokens to predefined intents
        intent = match_query_to_intent(tokens)
        
        if not intent:
            return "Sorry, I can only provide information on predefined financial queries."
        
        # Retrieve latest available data for the selected intent
        latest_data = df.iloc[0]  # Assuming we always take the latest year's data
        
        # Construct response based on intent
        response = f"Latest financial data:\n"
        response += f"- {intent}: {latest_data[intent]}"
        
        return response
    
    except Exception as e:
        return f"Error: {str(e)}"

In [9]:
# Test the chatbot with natural language queries
print(financial_chatbot("What is the total revenue?"))
print(financial_chatbot("Show me the net income."))
print(financial_chatbot("What are the total assets?"))
print(financial_chatbot("Cash flow details."))
print(financial_chatbot("What is the profit?"))  

Latest financial data:
- Total Revenue: 2,11,915
Latest financial data:
- Net Income: 72,361
Latest financial data:
- Total Revenue: 2,11,915
Latest financial data:
- Cash Flow from Operating Activities: 14
Sorry, I can only provide information on predefined financial queries.
