In [10]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [11]:
# Getting the dataset 
data = pd.read_csv('../datasets/Task 2/IMDb-Movies-India.csv', encoding='ISO-8859-1')


In [12]:

# Ensuring 'Duration' is a string, and then removing ' min' from the values
data['Duration'] = data['Duration'].astype(str)  # Converting 'Duration' to string type
data['Duration'] = data['Duration'].str.replace(' min', '', regex=False)
data['Duration'] = pd.to_numeric(data['Duration'], errors='coerce')
# Cleaning the 'Votes' column by converting it to numeric values
data['Votes'] = pd.to_numeric(data['Votes'], errors='coerce')  

In [13]:
#Handling missing values
# Filling missing values for categorical features (e.g., 'Genre', 'Director', 'Actor 1', 'Actor 2', 'Actor 3')
data.fillna({
    'Director': 'Unknown', 
    'Actor 1': 'Unknown', 
    'Actor 2': 'Unknown', 
    'Actor 3': 'Unknown',
    'Duration': data['Duration'].median(),  # Filling missing 'Duration' with median value
    'Votes': data['Votes'].median()         # Filling missing 'Votes' with median value
}, inplace=True)

In [14]:
# Extracting features and target
X = data[[ 'Director', 'Actor 1', 'Actor 2', 'Actor 3', 'Duration', 'Votes']]
y = data['Rating']  # Target variable: Rating

In [15]:

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), [ 'Director', 'Actor 1', 'Actor 2', 'Actor 3']),  # One hot encoding
        ('num', StandardScaler(), ['Duration', 'Votes'])  # Scaling numerical features
    ])

In [16]:
# Spliting the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
X_train= X_train[y_train.notna()]
y_train= y_train.dropna()

In [18]:
# Step 6: Building a regression pipeline with a Random Forest model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))  # RandomForestRegressor for predicting rating
])

In [19]:

# Training the model
model.fit(X_train, y_train)

In [20]:
# Making predictions
y_pred = model.predict(X_test)

ValueError: Found unknown categories ['Satyajit Halder', 'Sartaj Singh Pannu', 'Yogesh Saxena', 'Patience Cooper', 'Dinesh Vijan', 'Anirudh Baboo', 'Rackhee Sandilya', 'Nakul Singh Sawhney', 'Debu Sen', 'Surinder Suri', 'Pankaj Kr Virat', 'Munindra', 'Sivaranjini', 'K.K. Talwar', 'Chander Saigal', 'Ajit Kumar', 'Padmapriya', 'Manish Garg', 'Narmada Shankar', 'Sethi. G.R.', 'Willard Carroll', 'Mumtaz', 'Rohit Aarrya', 'Saumyy Shivhare', 'Jayashree Kanal', 'Namrata Rao', 'Nikhil Bose', 'Pannalal Ghosh', 'Barkat Mehra', 'Rajesh Babbar', 'Behzad Khambata', 'Sanjeev Rattan', 'Kamaal Narvi', 'Vandana Sharma', 'Gulshan Ashte', 'Suraj Bharti', 'Samit Bhanja', 'W.Z. Ahmed', 'Iqbal Raj', 'N.K. Ziree', 'Anil Srikantam', 'Tamás Tóth', 'Shakuntala Paranjpye', 'Raja', 'Milinnd Sagar', 'Puja Bedi', 'Vishwamitter Adil', 'Arindam Chaudhuri', 'Arghya Basu', 'Zul Vellani', 'Lakshmi Musari', 'H.B. Singh', 'K. Talpade', 'Milind Swamy', 'Trinayan Sharma', 'Paritosh Painter', 'Mahendra Sandhu', 'Girija', 'Prashant Sehgal', 'Shuja Ali', 'Manish', 'Kamal Mehta', 'Sheetal', 'M.K. Burman', 'Shyama', 'Ramu Kariat', 'Hussein Khan', 'Divakar Ghodake', 'Krishna Kapil', 'A. Shakoor', 'R.C. Sakthi', 'Kushal Ruia', 'Vivek Rai', 'Pardeep Raja', 'Asokan', 'Habib', 'Dhiraj Mishra', 'Majnu', 'K.P. Sasi', 'Soumendra Padhi', 'Bela Bose', 'Kesar Vasistha', 'Biren Chatterjee', 'Bharat Arora', 'Kirti Reddy', 'Ravi Davala', 'Suhail Azmi', 'Richa Pallod', 'Rajendra Shivv', 'Nitin Chandra', 'Farukh Khan', 'Kewal Krishna', 'Mikki Koomar', 'Rakesh Sharma', 'Bat for Lashes', 'B.S. Ghad', 'Pragyesh Singh', 'C.S. Kumar', 'Leena Malhotra', 'Pryas Gupta', 'Mashhoor Amrohi', 'Sarath Kumar', 'V. Ravichandran', 'Latif Faiziyev', 'Venkatesh Kumar', 'D. Raman', 'Aarun Nagar', 'Jimmy Jaiswal', 'Madhu Shrikaar', 'Saheem Khan', 'Keki Adajania', 'Ajay Bannerjee', 'Sourabh Bali', 'Rajesh Ramdev Ram', 'Randeep Jha', 'N.R. Acharya', 'Rajeshwar Pandey Raaj', 'Hassan Nazer', 'Naqi Ahmed', 'H.K. Shivdasani', 'Riingo Banerjee', 'Vivek Anand', 'Harbhajan Virdi', 'Devinder Kanwar', 'Rukun', 'Bhanupriya', 'V.N. Aditya', 'Dheeraj Kumar', 'Anil Senior', 'Aditya Dhar', 'Saif Hyder Hasan', 'Viktoria Rampal Dzurenko', 'Sabir', 'Suryaprakash Lakkoju', 'Shubhanshu Satyadeo', 'Shabnam Virmani', 'Mukesh Duggal', 'Vaishnavi Sundar', 'Abdul Rehman Kabuli', 'V. Krishna Kumar', 'Mohan Gandhi', 'Arvind Bhatt', 'Akhil Gautam', 'Deva Dutta', 'Ganesh Loke', 'Attaullah Khan', 'K. Gopal Krishnan', 'Mannan A. Gada', 'Sudhir Sen', 'Devkanya Thakur', 'Vijay Kumar Mishra', 'Sunit Sinha', 'Alaknanda', 'Abhishek Bindal', 'Raghubeer Singh', 'Ram Kamal Mukherjee', 'Krishna Agazzi', 'Ravi Adhikari', 'Prabhleen Sandhu', 'Sagar Joshi', 'Nirupa Roy', 'Sajan', 'Shankar Nag', 'Ajay Singh', 'Goswami Anurag', 'Shivraj Goyat', 'Raja Thakur', 'Jai Prakash', 'Arvind Sen', 'Jagdish Waghela', 'Srividya', 'Sarvnik Kaur', 'Shyamal K. Mishra', 'Narayan Chauhan', 'Harishankar Tamminana', 'Kailash Surendranath', 'Baldev R. Jhingan', 'Vivek', 'Sanm', 'Gul Hamid', 'Kali Prasad Ghosh', 'David Abraham', 'Nandlal Nayak', 'Keshhav Panneriy', 'Kanwar Jagdish', 'Inayat', 'Aslam Khan', 'Rana Jung Bahadur', 'Kumud Pant', 'Meghani', 'Ashish Balram Nagpal', 'Kamlakar', 'Manoj J. Bhatia', 'K.L. Kahan', 'Anant Joshi', 'Natwar Shyam', 'Kamran', 'Chitra', 'Shams Tanweer', 'Aslam Basha Shaik', 'Shorya Veer Saagar', 'Baburao Painter', 'Mobeen Warsi', 'Walli', 'Leela Chitnis', 'Chella', 'Asha Dutta', 'Ashwin Neal Mani', 'Rahul Singh', 'B. Gupta', 'Reema Rakesh Nath', 'Braj Bhushan', 'N. Paryani', 'Naseem Siddiki', 'Noor Mohammad Charlie', 'Sushil Malik', 'T. Madhava Rao', 'Piyush Pande', 'Shankar Mehta', 'Deepak Baldev', 'Tara', 'Ramnath Roy', 'Mahesh Kodiyal', 'Rajendra Sharma', 'Saira Banu', 'Joshua Z Weinstein', 'Mickal', 'Anjali Devi', 'Sneha', 'Jyotish Bannerjee', 'Aadish Keluskar', 'N. Shankaran Nair', 'V.K. Sharma', 'Lubdhak Chatterjee', 'Jalal Agha', 'Hasmukh Rajput', 'Dinesh Saxena', 'Jal Merchant', 'Dinesh Lakhanpal', 'Ilyas Kashmiri', 'Utpal Chaudhary', 'Fram Sethna', 'Madhav Mehta', 'Uday Kiran', 'Prem Chopra', 'Jaswinder Chahal', 'Jwalamukhi', 'Mukesh Kumar', 'Aanuj Tewari', 'Neeta Jain-Duhaut', 'Mahesh Babu', 'Pandya Manoj', 'Satish Ranadive', 'Prosit Roy', 'K.R. Rangan', 'G.K. Devare', 'Abhijit Mazumdar', 'Kewal Kumar', 'Ajit Lahiri', 'Romi Bhai', 'Yogesh Mishra', 'Laxmi', 'P K Radhakrishnan', 'Vicky Bhardwaj', 'Snehal Garg', 'Abhishek Utkarsh Deokar', 'Jagapathi Babu', 'Vishakh Punna', 'Jagan Shakti', 'Jignesh V. Vaishnav', 'Gowtam Tinnanuri', 'Susheel Gajwani', 'Kumar Pradeep', 'Vijaya Choudhury', 'Naresh Ambasta', 'Antara Mali', 'Satidnya Babbar', 'Ali Athar', 'S. Usman', 'Vellaaichamy', 'Bhagwan Thakur', 'Rohit Dwivedi', 'K.C. Agarwal', 'Nandan Kudhyadi', 'Faizan Kareem', 'Ranga Nath', 'Lv Siva', 'Dhavala Satyam', 'Rajiv Dhingra', 'Vijaya Krishnan', 'Saumya Sharma', 'Kallol Sen', 'Dhumal', 'Gyan Sahay', 'Usha', 'Ranjibhai Arya', 'Chidananda Das Gupta', 'Chandni', 'Tiffanie Hsu', 'Ramya Krishnan', 'Suraiya', 'Ugresh Prasad Ujala', 'Bhanumati', 'Feroze Dastur', 'Raja Chanda', 'Sudhir R. Nair', 'Kalaignanam', 'Tanuja Shankar Khan', 'Azaad Irani', 'Varalakshmi G.', 'Ravi Kumar', 'Nag Ashwin', 'Jaidev Chakraborty', 'Mohnish Bahl', 'Nabendu Ghosh', 'Sunny Kapoor', 'Hemant Verma', 'Hemant Prabhu', 'Lalita Pawar', 'Yatin Karyekar', 'Shiva Dagar', 'Ulrike Mothes', 'Rafiq Rizvi', 'Devanshu Singh', 'Pratap Chandra', 'Pawan k Shrivastava', 'Dave Jigar', 'Nitin Manmohan', 'Tamilvannan', 'Anil Nagrath', 'Amit Vats', 'Swaroop Sawant', 'Francesca Archibugi', 'Aruna Irani', 'Nikunj Rathod', 'Prince Khan', 'Husn Banu', 'Srinath Rajendran', 'Arpan Sarkar', 'Shamas Nawab Siddiqui', 'Sandeep Bhatt', 'Harbans Singh', 'Reema Mukharjee', 'Arvind Tripathi', 'Bharath Bhushan Nethi', 'Jainendra Jain', 'Babloo Seshadri', 'Chiranjeet Dhawan', 'Sonali Joshi', 'Tilakdhari Tripathi', 'Firoz Ali', 'Nayana Khedkar', 'Hiren Bose', 'Bhudo Advani', 'Kapil Kaustubh Sharma', 'Ajit Singh Deol', 'Sunita Malpani', 'Harish Kumar', 'Ritesh s Kumar', 'Ashish Nehra', 'B.L. Chopra', 'Mithun Chakraborty', 'Raaj Rahhi', 'Ranjith Oraon', 'Samudra V.', 'R. Sharma', 'B.S. Rajhans', 'Nitin Supekar', 'Nagarjuna Akkineni', 'Sharad Sharan', 'Sudarshan Babbar', 'K. Sardar', 'H.A. Rahi', 'M. Sabharwal', 'Sudhakar Sharma', 'Chimankant Gandhi', 'Bobby Singh', 'R.S. Tara', 'Nelson', 'Vikas Anand', 'Y.V.S. Chowdary', 'P. Anil', 'Junior Dharmendra', 'Mahesh Kapoor', 'Akashaditya Lama', 'Zameer Kamble', 'Iqbal Khan', 'Mahesh', 'Rishikesh Pandey', 'Radhika Chaudhari', 'Jayalalitha', 'Khatun', 'Deepak Rao', 'Chandu', 'W.M. Khan', 'Maruti', 'K.D. Singh', 'Shagufta Ali', 'Rituparna DasDatta', 'A.K. Bir', 'Ambika', 'Sanjay F. Gupta', 'Pratyush Upadhyay', 'Goutam Pawar', 'Ss Vasan', 'Bibi', 'Navjot Gulati', 'Kishore Khanna', 'Swaroop Kanchi', 'Qasiar Sabhai', 'Darshan Laad', 'Teerat Singh', 'Mithu Singh', 'Rahul Deo Verma', 'N.G. Devare', 'Aarif Sheikh', 'Shravan Kumar', 'Madanrai Vakil', 'Ashoke Pandit', 'Nabyendu Chatterjee', 'Daksh Pandya', 'Pushraj', 'Vinnil Markan', 'Tarun Mansukhani', 'Yogesh Mehra', 'Rakesh Kashyap', 'Meena', 'Jakee Patel', 'Satish Kaul', 'Mohammed Israr Ansari', 'Niranjan', 'Leon Rodriguez', 'Shiraz Henry', 'Venkatesh Bk', 'Manish Goel', 'C.R. Bajaj', 'A B Arjun', 'Swadesh', 'Vickram', 'Monti Issar', 'Kukku Batra', 'Kumar Bhatia', 'Usha Ganesarajah', 'Somesh Agarwal', 'Rajesh', 'Pep Figueiredo', 'Shakir Khan', 'Ramchandra Gopal Torney', 'Gulshan Singh', 'Sanjay Jha', 'Kulbhushan Kharbanda', 'Manivasagam', 'K.A. Narayan', 'Ashok Patil', 'Araya', 'Siddharth Nagar', 'R.D. Mathur', 'Sunny Kumar', 'Fenil Seta', 'Syed-Hasan', 'Jayoo', 'Jal', 'P K Krishnan', 'Nina Shivdasani', 'K. Bhushan', 'Mandhar Shetty', 'Gurdip Singh', 'Atanu Mukherjee', 'Prithvi Konanur', 'Ruslaan Mumtaz', 'Jaimin Bal', 'Abhi Bhattacharya', 'Deepankar Prakash', 'Adarsh Eshwarappa', 'Heena Kausar', 'Satish Rai', 'Ezra Mir', 'Chanda', 'Abhishek Bhatnagar', 'Meena Lalit', 'Boopathy Pandian', 'Munnawar Bhagat', 'Ojaswwee Sharma', 'Shekhar Suman', 'Chaturbhuj Doshi', 'Pawan Kumar', 'Ashima Bhalla', 'Piyush Panjuani', 'Haribhai Raghunathji Desai', 'S.S. Rajamouli', 'Siva G.', 'Geeta Bali', 'Rajshri Nair', 'Rahul Kumar Shukla', 'Baburao Patel', 'Jabir Ali', 'Sadhana Khote', 'Bn Rao', 'Dinesh Soni', 'W. Garcher', 'Nitti Kumar', 'Tapeshwar Prasad', 'Mohan Savalkar', 'Jaddanbai', 'Girish Trivedi', 'Shiva', 'Jaymala Adarsh', 'Saurabh Sinha', 'Abid', 'Tipu Khan', 'Rebecca Haimowitz', 'Mandi Burman', 'C. Raghuvir', 'Akshay Dandekar', 'Vicky Khan', 'Bandini Mishra', 'Sheshagiri Yelameli', 'Nalin Singh', 'Rahul Jain', 'Kritn Ajitesh', 'Mohit Takalkar', 'Vasant Dalal', 'Jayanta Basu', 'Dr. Vikram', 'Vishal Mahadkar', 'Sriprakash', 'Agha', 'Dev Malhotra', 'Vijay Pal', 'Rajesh Mapuskar', 'Rajesh Nahta', 'Sunita Devi', 'Charandas Shokh', 'Anadinath Bannerjee', 'Tushar Amrish Goel', 'Behram Mukadam', 'Rajiv Goswami', 'Pesi Karani', 'Sharad Babu', 'Mahipal', 'Ganpati Bohra', 'Maheshwari', 'Mohan Rathod', 'Vishwa Naidu', 'Ravindra Vyas', 'Ramnik Desai', 'Dharmendra', 'Sooraj Kumar Sharma', 'Vishnu Raaje', 'Dedipya Joshii', 'Samiran Dutta', 'Sadashivam Rao', 'Kukoo Kapoor', 'Dinesh Thakkar', 'Manno Desai', 'Cuckoo', 'Ajay Mehra', 'Naseeruddin Shah', 'Sachin Bajaj', 'Brij Gopal', 'Mohammed Hanif', 'Narendra Grewal', 'Richie Mehta', 'Ashutosh Warang', 'M.M. Baig', 'Saad Khan', 'Sudhanshu Saria', 'Satyam Raj', 'Dewashish Ghosh', 'Sriramulu Naidu S.M.', 'Dipankar Senapati', 'Pradeep Chandra', 'K.M. Multani', 'Manish R. Khandelwal', 'Manish Saini', 'Robin Chourasia', 'Anugrah Agnihotri', 'Chandramohan', 'Jaswant Jhaveri', 'Rajni Chandra', 'Chitra Shenoy', 'Miraq Mirza', 'Rajni Bala', 'Subodh Govil', 'Jayabrato Chatterjee', 'Guddu', 'Jessie Kerry', 'Indira', 'Rao C.S.R.', 'Suman Mukhopadhyay', 'D.M. Subhashish', 'Apurv Bajpai', 'Gunasekhar', 'Durga Khote', 'Jugal Raja', 'Keshav Arya', 'Alexander', 'Renny Mascarenhas', 'Shahab Shamsi', 'Vijaya Nirmala', 'Abhishek Dudhaiya', 'Satish Rajwade', 'Ram Mohan', 'Antariksh Jain', 'Randhir', 'Steven Lake', 'Pankaj Shukla', 'Shalil Kallur', 'Suraj Dev Sahu', 'Masum Ali Khan', 'Neeraj Singh', 'Avinash Dhyani', 'Sanjay Singh Negi', 'Sakthi Chidambaram', 'Shardul Maurya', 'Bindiya Mohapatra', 'Hari Prasad Reddy', 'Ramesh Deo', 'Monica Desai', 'Harshadrai Sakerlal Mehta', 'R.S. Junnarkar', 'Amrita', 'Raghunath Singh', 'Lala Yaqoob', 'Nurjahan', 'Kavi Raz', 'Ejaz Gulab', 'Hussain Sharif', 'Raja Paranjpe', 'Raaja Thakur', 'Nagma', 'Madhuri', 'Lakshmi', 'Prakash Verma', 'Raja Yagnik', 'Ashfaque Shaikh', 'Jai', 'Aashnee', 'Kaushal Bharati', 'Shilpi Dasgupta', 'Zahid Ali', 'Romi Behl', 'Gayatri', 'Raja Bhargav', 'Bobby Tappia', 'Preeti Patel', 'Sandeep Kumar Chandola', 'Yashpal Billa', 'Narsimha Raju', 'Shakeela', 'Laxman Yaara', 'Shankar Vikram', 'Jitin Rawat', 'Abhijeet Warang', 'Piyush Saxena', 'M. Gani', 'Naresh Nagpal', 'Pankaj Batra', 'James John', 'Jagdish Gautam', 'Shefali Bhushan', 'Jyoti Sarup', 'Adoor Bhasi', 'Anup Singh', 'Narayan k Sahu', 'Jay Prakash', 'Kedar Sharma', 'Partha Ganguly', 'V.N. Reddy', 'Anil Pandit', 'Abhilasha', 'R. Elavarasan', 'S.R. Puttana Kanagal', 'Khwaja Mohammad Abbas', 'Sudip Bandyopadhyay', 'Radha', 'Chitrapu Narayana Rao', 'Mayank P. Srivastava', 'M.A. Mirza', 'S.A. Ashokan', 'T.S. Mohan', 'Marco Hülser', 'Ajay Naik', 'Fuwad Khan', 'Navinchandra', 'Dharam Panesar', 'Urmila Bhatt', 'Kishan Shrikanth', 'Guddu Jaffrey', 'Ranjit Tewari', 'Om Sai Prakash', 'Bhaskar', 'Aanand Raut', 'Jyoti Kapur Das', 'Manher Desai', 'Gohar', 'Manika Sharma', 'Vishan Yadav', 'Vatsa', 'Nandamuri Balakrishna', 'Tonje Gjevjon', 'Rohit Philip', 'Saahil Prem', 'Saurab Narang', 'Vithaldas Panchotia', 'Munir Khan', 'A.M. Khan', 'Rajesh Patole', 'Bimal Rawal', 'Nagraj Manjule', 'Vimala Vasishta', 'Alok Shrivastav', 'Prithviraj Sukumaran', 'Ashok Hegde', 'Shakir Shah', 'Niharika Popli', 'Sarim Momin', 'Raja Bagla', 'Ather Khan', 'S. Prasad', 'Subhash Shah', 'Dhalapathi', 'Rakesh Mehta', 'Helen', 'Durgesh Kumar', 'Rajesh Vakil', 'Jyothi Lakshmi', 'Irshad Khan', 'Ramkumar Shedge', 'K. Ramanlal', 'Siddesh Shetty', 'S. Karnam', 'Mahaveer Shringi', 'Kunaal Roy Kapur', 'Ravi Sharma Shankar', 'Rakesh Saraiya', 'Girish Juneja', "Genelia D'Souza", 'K Sukumar', 'Gaurav Goswami', 'Bidisha Adhikari', 'K. Kamleshwar Rao', 'Rajkumari', 'G.P. Pawar', 'Mandeep Kumar', 'Sudarshan Bhatia', 'Moti Sagar', 'Gulab', 'Vikas Indra', 'Devraj', 'Durba Sahay', 'Ghulam Rasool', 'Deepak Tanwar', 'Monu Dhankad', 'Arun Chandu', 'Sayaji Shinde', 'Manav Wadhwa', 'Gianpaolo Bigoli', 'Sajith Bahaskar', 'Anil Dhoble', 'R.S. Vimal', 'Anamika', 'Datta Dharmadhikari', 'Balwant Singh Suri', 'Nishit Jadawala', 'Sundarrao Nadkarni', 'Sumit Sagar', 'M. Bhagwandas', 'Param Tomanec', 'Rajesh Harivansh Mishra', 'Bela Negi', 'Jambuwant Rao Dhote', 'Altaf', 'Rohena Gera', 'S.F. Hasnain', 'Narottam Vyas', 'Mama', 'Dinkar Kapoor', 'Jagdev Bhambri', 'Raju Parsekar', 'Rafat Abbas Ali', 'Laxmikant Shetgaonkar', 'Harvinder Pal', 'Arvinda Jaiswal', 'C.L. Dheer', 'Sunil Sanjan', 'Vibhu Puri', 'Banty Dubey', 'Ranjan Mistry', 'Murali', 'Kishore Kapoor', 'Himmat Dave', 'Ravi Chakkravarthy', 'Meera Jasmine', 'Mukhtar Begum', 'Sam Bhattacharjee', 'Bindu', 'Kamal Sharma', 'Jayant', 'Surya Kumar', 'Sadhu Singh', 'Kamdar', 'Lucky Ram', 'Yogesh', 'Nagaraja Kote', 'Sandeep Kumar Rana', 'Mohanlal', 'S.M. Ibrahim', 'Pralhad Keshav Atre', 'M.C. Kapoor', 'Ram Devineni', 'Abhishek Paul', 'Swabri Abdulkhader Khasim', 'Andrzej Bartkowiak', 'Amin Prakash', 'Robert Sigl', 'Aditya Bharadwaj', 'Abhik Bhanu', 'Mohan Choti', 'James Ivory', 'Robert Johnson', 'Sarita Khurana', 'B.S. Glaad', 'Udayshankar', 'Abdul Qayyoom Khan', 'Amit Chandra Sahay', 'Avinash Abhimanyu', 'Yudhishthir Singh', 'Andreas Koefoed', 'Devi Dutt', 'Priya Jhavar', 'Bijoy Banerjee', 'Y H Rizvi', 'Mahendra Pran', 'Gopi Sapru', 'J.S. Keshwani', 'Meenal Dixit', 'Dulari', 'Daud Chand', 'Dharam', 'S. Khalil', 'Shomshuklla Das', 'Sibtain Fazli', 'Samarjit Dasgupta', 'Arun Kaul', 'Anuj Pandey', 'Nikhil Bhat', 'Rahul Tiwari', 'Faizal Khan', 'D.S. Sultania', 'Hameeda Bano', 'Kompin Kemgumnird', 'Poonam Dasgupta', 'Kamal Chandra', 'Shome Makhija', 'Puneet Issar', 'Laurie Colson', 'Prakash Arora', 'Vicky Tejwani', 'Lovely Singh', 'Amala Akkineni', 'Ashok Neemrania', 'Vipin k Sethie', 'Shakti Kapoor', 'Rupa Sain', 'Jayadevan', 'Jayalalitha J', 'Binu Bhaskar', 'Naresh Gupta', 'Shambhu Purohit', 'Girish Manukant', 'Vijay Ratnakar Gutte', 'Hercules', 'Pankaj Purohit', 'Megan Doneman', 'Parveen Dagar', 'Diljeet Bassi', 'Aditi Roy', 'Dayanand Rajan', 'Bhabhani Prasad', 'Mahesh Shandilya', 'Ram Dayal', 'Bjarney Lúðvíksdóttir', 'Qamar Narvi', 'Kulbir Badesron', 'Viji P. Nair', 'Mona Ambegaonkar', 'Vikramjit Singh', 'Sawrabh Singh Bedi', 'Dharmendra Shekhar Ojha', 'Madhu Patel', 'R.A. Shankar', 'Sandeep Tiwari', 'Dhruva Harsh', 'Dada Mirasi', 'Rajeev Raj', 'Hasrat Lucknowi', 'Akhilesh Kumar Upadhyay', 'Saroja Devi B.', 'Babubhai Patel', 'G.R. Sethi', 'Keshavrao Date', 'P. Subramaniam', 'Ishwarlal', 'Jody Hassett Sanchez', 'Nicholas Meyer', 'Mukesh Saigal', 'Dil Kumar', 'Ghaznavi', 'Ajit', 'Manish Acharya', 'Dileep', 'Masood Ali', 'Shekhar Ramesh Mishra', 'Raksha Mehta', 'Aziz Chhapra', 'Thomas Jacob', 'Mangesh Hadawale', 'Kesari', 'Atul Garg', 'P. Chandra Kumar', 'Akshay Roy', 'Sameer Sharma', 'Afnan Amrohi', 'Nazir Ajmeri', 'Munindra Gupta', 'Durvesh Arya', 'Kalairani', 'Sanjeevani Gupta', 'Pallavi Joshi', 'Mitra Sen', 'Ranjit Kapoor', 'Ram Prakash', 'Chiranjeevi', 'Ajay Arak', 'Pushkar Jog', 'Trilok Kapoor', 'Nanubhai Vakil', 'Rajkumar', 'Harryy W. Fernaandes', 'Jaiveer', 'Kulbhushan', 'Manmohan Krishna', 'Khushboo Ranka', 'Ashok G.', 'Ovais Khan', 'Ramnik Vaidya', 'Ankur Pajni', 'Thakur Tapasvi', 'Saki Shah', 'P.S. Murthy', 'K.S.L. Swamy', 'Suruchi Sharma', 'Sooni Taraporevala', 'K. Babuji', 'Akkineni Kutumba Rao', 'O.P. Dutta', 'Azhagar Malai', 'Ajit Pal Mangat', 'J. Nutan Punkaj', 'Vinil Mathew', 'Jagdish Kumar Nirmal', 'Umar Sharif', 'Andaleeb Sultanpuri', 'Diwakar Naik', 'Manoj Giri', 'Dinesh Chaudhary', 'Reena Kukreja', 'Jayaprakash Radhakrishnan', 'Jagdish Sethi', 'Kalabhavan Mani', 'Rasool Ellore', 'Rajan Lyallppuri', 'Amit Rai', 'Sanjay Goel', 'Arun Nalawade', 'S.M. Iqbal', 'Ajay Kanchan', 'Desari Arun Kumar', 'Ashish Bhatia', 'Rajesh Rathi', 'Mehran Amrohi', 'Harish Sharma', 'Vm Gunjal', 'Nitin Mahadar', 'Tushar Hiranandani', 'Vickrant Mahajan', 'Kislay', 'Bhanuchander', 'Prakash Inamdar', 'Bishnu Dev Halder', 'Aziz Khan', 'Babu Bhai Jani', 'Raja Sandow P.K.', 'Anand Bhat', 'Nirbhay Shankar', 'Ashish Yadav', 'Aji Mathai', 'Renju Ramesh', 'A.K. Hangal', 'Lalit Vachani', 'Manoj Srivastava', 'Jagdish Kadar', 'Fazil', 'Kant J. Parmar', 'Ramesh Mehta', 'Kommineni', 'Zaheer D. Lari', 'Deepika Padukone', 'Dharmendra Baghel', 'Rajnish Bahl', 'Prashast Singh', 'Anupam Sinha', 'Mehrish', 'A.R. Kardar', 'Rishi Prakash Mishra', 'Allyson Patel', 'Gautami', 'Hk Verma', 'Mustajab Malik', 'Pradeep C. Shetty', 'Anupam Kher', 'Rajan Kumar Patel', 'Vicky S. Kumar', 'Premji', 'Inder Raj Anand', 'Hemwant Tiwari', 'Vinod Desai', 'Anand Bhaskar-Rao', 'Kritika Sachdeva', 'Brahmanandam', 'Ashfaq Makrani', 'Sharrma Mayyank', 'Amarnath', 'Bharat Bhushan', 'Shivaji Ganesan', 'Hriday Sharma', 'Khader Abdul', 'Akbar Khan', 'Krishna Sethi', 'Umar Khan', 'Anuradha', 'Kevin Schmutzler', 'Nari Ghadiali', 'Lom Harsh', 'Faruq Masudi', 'Vipin Sharma', 'Birendra', 'Mohini', 'Dhiraj Kumar', 'Soujan Josseph', 'Sahila Chaddha', 'Ramesh Gupta', 'Subba Rao B.A.', 'S.B. Nayampalli', 'Murali Nair', 'Rashid Khan', 'Kishore Anand Bhanushali', 'Arshad Sayed', 'Anu Dhawan', 'Zohra', 'Devaraj', 'Soniya Singh', 'Abir Sengupta', 'Jugraj', 'Pradeep Kumar', 'Rajesh Sabharwal', 'Raj Babu', 'Dinkar D. Patil', 'Niren Lahiri', 'Shiva Shakti Datta', 'Mohan Agashe', 'Maria Sigrist', 'Prabhu Rathod', 'Yogesh Dubey', 'Raghuvaran', 'Hersh Kinnu', 'Karthik G. Krish', 'Madhu Apte', 'Sadhana', 'Shantanu Bagchi', 'Adarsh', 'P.D. Shenoy', 'Raju Chouhan', 'Nusrat Sayyed', 'Rahul Sharma', 'Ashish Kumar', 'Chandulal Shah', 'Harbans', 'Dharan Mandrayar', 'G.T.B. Harvey', 'A.H. Essa', 'Ss Arora', 'Eddie Billimoria', 'Himanshu Bhatt', 'Amit r Agarwal', 'Alokk Srivastava', 'Ajit Asthana', 'Mainak Dhar', 'Rana Jaiswal', 'Manu Rewal', 'Omi Bedi', 'Jaggi Rampal', 'Raju Khan'] in column 0 during transform

In [None]:
# Evaluating the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared: {r2}")