In [1]:
pip install nltk




In [1]:
import nltk

In [2]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\OMEN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:



import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity

import numpy as np

class SVMBot:
    def __init__(self, name):
        self.dataset = None
        self.model = None
        self.vectorizer = None
        self.name = name

    def read_dataset(self, dataset_link):
        try:
            self.dataset = pd.read_csv(dataset_link)
        except:
            raise ValueError("Invalid dataset link. Please try again.")

    def get_column_names(self):
        if self.dataset is None:
            raise ValueError("Dataset not loaded. Please load dataset first.")
        return self.dataset.columns.tolist()

    def train_model(self, X_column, Y_column):
        if self.dataset is None:
            raise ValueError("Dataset not loaded. Please load dataset first.")

        if X_column not in self.dataset.columns.tolist() or Y_column not in self.dataset.columns.tolist():
            raise ValueError("X or Y column not found in the dataset. Please provide valid column names.")

        X = self.dataset[X_column].values.reshape(-1, 1)
        Y = self.dataset[Y_column].values

        # Perform feature scaling on X values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

       # Train the SVM model
        self.model = SVR(kernel='rbf')
        self.model.fit(X_scaled, Y)

    def predict(self, X_value):
        if self.model is None:
            raise ValueError("Model not trained. Please train model first.")

        # Reshape X_value into a 2D array
        X_value = np.array(X_value).reshape(1, -1)

        Y_pred = self.model.predict(X_value)

        return Y_pred[0]

    def process_text(self, text):
        # Tokenize text
        tokens = word_tokenize(text)

        # Remove stop words
        stop_words = set(stopwords.words("english"))
        tokens = [token for token in tokens if token.lower() not in stop_words]

        # Perform stemming
        stemmer = PorterStemmer()
        tokens = [stemmer.stem(token) for token in tokens]

        return tokens

    def chat(self):
        print( "Welcome to {self.name}!")
        print("You can ask me questions about your dataset and I will do my best to assist you.")
        print("Enter 'quit' to exit the chat.")

        while True:
            user_input = input("User: ")
            user_input = user_input.lower()

            if user_input == 'quit':
                print("Chatbot: Goodbye!")
                break

            # Process user input
            tokens = self.process_text(user_input)

            # Check for keywords and respond accordingly

            if 'hello' in tokens or 'hi' in tokens:
                print("Chatbot: Hi there!")
            elif 'how are you' in user_input:
                print("Chatbot: I'm doing well, thank you!")
            elif 'thank you' in tokens or 'thanks' in tokens:
                print("Chatbot: You're welcome!")
            elif 'bye' in tokens or 'goodbye' in tokens:
                print("Chatbot: Goodbye!") 
                break

            elif 'load' in tokens and 'dataset' in tokens:
                while True:
                    try:
                        file_path = input("Chatbot: Please provide the file path of the dataset on your computer: ")
                        self.read_dataset(file_path)
                        print("Chatbot: Dataset loaded successfully!")
                        break
                    except ValueError as e:
                        print("Chatbot: Failed to load dataset. Please try again.")
          
            elif 'train' in tokens and 'model' in tokens:
                while True:
                    try:
                        X_column = input("Chatbot: Please enter the name of the X column for training the model: ")
                        Y_column = input("Chatbot: Please enter the name of the Y column for training the model: ")
                        self.train_model(X_column, Y_column)
                        print("Chatbot: Model trained successfully!")
                        break
                    except ValueError as e:
                        print("Chatbot: Failed to train model. " + str(e))

            elif 'get' in tokens or 'column names' in tokens:
                print(self.dataset.columns)


            elif 'predict' in tokens:
                if self.model is None:
                    print("Chatbot: Model not trained. Please train model first.")
                else:
                    try:
                        X_value = float(input("Chatbot: Please enter the value of X for prediction: "))
                        Y_pred = self.predict(X_value)
                        print("Chatbot: The predicted value of Y is: " + str(Y_pred))
                    except ValueError:
                        print("Chatbot: Invalid input. Please enter a numerical value for X.")

            elif 'process' in tokens and 'text' in tokens:
                text = input("Chatbot: Please enter the text for processing: ")
                processed_text = self.process_text(text)
                print("Chatbot: The processed text is: ")
                print(processed_text)

            else:
                print("Chatbot: I'm sorry, I don't understand. Can you please rephrase your question?")
bot = SVMBot("Support Vector Machine Bot")
bot.chat()

Welcome to {self.name}!
You can ask me questions about your dataset and I will do my best to assist you.
Enter 'quit' to exit the chat.
User: hi
Chatbot: Hi there!
User: load dataset
Chatbot: Please provide the file path of the dataset on your computer: C:\Users\OMEN\anaconda3\diabetes.csv
Chatbot: Dataset loaded successfully!
