<h1>Creating the Model for the Negotiation engine</h1>

In [1]:
#importing necessary libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from googletrans import Translator
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import string

Step1:- Cleaning and Preprocessing Data

In [2]:
# Load the dataset
file_path = 'Datasets_with_engine/large_negotiation_dataset_with_engine.csv'
data = pd.read_csv(file_path)
# Drop rows with missing values
data.dropna(inplace=True)
print(data.info())

FileNotFoundError: [Errno 2] No such file or directory: 'Datasets_with_engine/large_negotiation_dataset_with_engine.csv'

In [None]:
# Encoding categorical variables (if any)
label_encoder = LabelEncoder()
data['Buyer'] = label_encoder.fit_transform(data['Buyer'])
data['Seller'] = label_encoder.fit_transform(data['Seller'])
data['Product'] = label_encoder.fit_transform(data['Product'])
data['Engine_Intervention'] = label_encoder.fit_transform(data['Engine_Intervention'])
print(data.head())  # Output first few rows after encoding categorical variables

   Buyer  Seller  Product                                  Stipulation_Buyer  \
0  68020   26820       57  Kristen Hopkins requires additional specificat...   
1  46386   27267       23  Jackie Olson requires additional specification...   
2   6920  114338        1  Angela Chung requires additional specification...   
3  44408   17566       42  Heather Anderson requires additional specifica...   
4  56446  122521       15  John Shields requires additional specification...   

                                  Stipulation_Seller  \
0  Day Ltd can consider optional upgrades for the...   
1  Delgado PLC can consider optional upgrades for...   
2  Trujillo, Hernandez and Mckee can consider opt...   
3  Case PLC can consider optional upgrades for th...   
4  Wilkinson-Ho can consider optional upgrades fo...   

                                          Resolution Negotiation_Date  \
0  A coupon code will be provided for future purc...       2023-07-02   
1  A coupon code will be provided fo

In [None]:
# Handling date-time features (if any)
data['Negotiation_Date'] = pd.to_datetime(data['Negotiation_Date'])
data['Negotiation_Year'] = data['Negotiation_Date'].dt.year
data['Negotiation_Month'] = data['Negotiation_Date'].dt.month
data['Negotiation_Day'] = data['Negotiation_Date'].dt.day
data.drop('Negotiation_Date', axis=1, inplace=True)
print(data.head())  # Output first few rows after handling date-time features

   Buyer  Seller  Product                                  Stipulation_Buyer  \
0  68020   26820       57  Kristen Hopkins requires additional specificat...   
1  46386   27267       23  Jackie Olson requires additional specification...   
2   6920  114338        1  Angela Chung requires additional specification...   
3  44408   17566       42  Heather Anderson requires additional specifica...   
4  56446  122521       15  John Shields requires additional specification...   

                                  Stipulation_Seller  \
0  Day Ltd can consider optional upgrades for the...   
1  Delgado PLC can consider optional upgrades for...   
2  Trujillo, Hernandez and Mckee can consider opt...   
3  Case PLC can consider optional upgrades for th...   
4  Wilkinson-Ho can consider optional upgrades fo...   

                                          Resolution  Engine_Intervention  \
0  A coupon code will be provided for future purc...                    2   
1  A coupon code will be pro

In [None]:
# Scaling numerical features (if any)
scaler = StandardScaler()
numerical_cols = ['Negotiation_Year', 'Negotiation_Month', 'Negotiation_Day']  # Update with actual numerical columns
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])
print(data.head())  # Output first few rows after scaling numerical features

   Buyer  Seller  Product                                  Stipulation_Buyer  \
0  68020   26820       57  Kristen Hopkins requires additional specificat...   
1  46386   27267       23  Jackie Olson requires additional specification...   
2   6920  114338        1  Angela Chung requires additional specification...   
3  44408   17566       42  Heather Anderson requires additional specifica...   
4  56446  122521       15  John Shields requires additional specification...   

                                  Stipulation_Seller  \
0  Day Ltd can consider optional upgrades for the...   
1  Delgado PLC can consider optional upgrades for...   
2  Trujillo, Hernandez and Mckee can consider opt...   
3  Case PLC can consider optional upgrades for th...   
4  Wilkinson-Ho can consider optional upgrades fo...   

                                          Resolution  Engine_Intervention  \
0  A coupon code will be provided for future purc...                    2   
1  A coupon code will be pro

In [None]:
# Splitting data into training and testing sets
X = data.drop('Engine_Intervention', axis=1)
y = data['Engine_Intervention']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)  # Output shapes of training and testing sets


(160000, 9) (40000, 9) (160000,) (40000,)


Performing Natural Language processing to Process the text For the machine to be feedable

In [None]:
# Download NLTK resources (run once)
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Function for text preprocessing
def text_preprocessing(text):
    # Convert text to lowercase
    text = text.lower()

    # Tokenization
    tokens = word_tokenize(text)

    # Remove punctuation and numbers
    tokens = [word for word in tokens if word.isalpha()]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    # Join tokens back into text
    processed_text = ' '.join(tokens)
    return processed_text

# Apply text preprocessing to Stipulation_Buyer and Stipulation_Seller columns
data['Stipulation_Buyer_Processed'] = data['Stipulation_Buyer'].apply(text_preprocessing)
data['Stipulation_Seller_Processed'] = data['Stipulation_Seller'].apply(text_preprocessing)
print(data)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
