In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:

# Canada Per Capita Income Prediction
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from google.colab import files

uploaded = files.upload()

data = pd.read_csv(next(iter(uploaded)))

print("First 5 rows:")
print(data.head())

print("\nDataset Info:")
print(data.info())

print("\nNull values:")
print(data.isnull().sum())

X = data[['year']]
y = data['per capita income (US$)']


model = LinearRegression()
model.fit(X, y)

predicted_income = model.predict([[2020]])

print("\nPredicted Per Capita Income in 2020:")
print(predicted_income[0])

Saving canada_per_capita_income.csv to canada_per_capita_income (2).csv
First 5 rows:
   year  per capita income (US$)
0  1970              3399.299037
1  1971              3768.297935
2  1972              4251.175484
3  1973              4804.463248
4  1974              5576.514583

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   year                     47 non-null     int64  
 1   per capita income (US$)  47 non-null     float64
dtypes: float64(1), int64(1)
memory usage: 884.0 bytes
None

Null values:
year                       0
per capita income (US$)    0
dtype: int64

Predicted Per Capita Income in 2020:
41288.69409441762




In [11]:

# Salary Prediction (Fixed Version)

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from google.colab import files

e
uploaded = files.upload()


data = pd.read_csv(next(iter(uploaded)))


print("Before Cleaning:")
print(data.info())


data = data.dropna()

print("\nAfter Cleaning:")
print(data.info())


X = data[['YearsExperience']]
y = data['Salary']

model = LinearRegression()
model.fit(X, y)

predicted_salary = model.predict([[12]])

print("\nPredicted Salary for 12 years experience:")
print(predicted_salary[0])

Saving salary.csv to salary (2).csv
Before Cleaning:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   YearsExperience  28 non-null     float64
 1   Salary           30 non-null     int64  
dtypes: float64(1), int64(1)
memory usage: 612.0 bytes
None

After Cleaning:
<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, 0 to 29
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   YearsExperience  28 non-null     float64
 1   Salary           28 non-null     int64  
dtypes: float64(1), int64(1)
memory usage: 672.0 bytes
None

Predicted Salary for 12 years experience:
139049.6749539778




In [13]:

# Multiple Linear Regression - Hiring (Corrected)


import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from google.colab import files


uploaded = files.upload()


data = pd.read_csv(next(iter(uploaded)))

print("Original Data:")
print(data)


data['experience'] = data['experience'].replace({
    'zero':0, 'one':1, 'two':2, 'three':3,
    'four':4, 'five':5, 'six':6,
    'seven':7, 'eight':8, 'nine':9,
    'ten':10, 'eleven':11, 'twelve':12
})

data['experience'] = pd.to_numeric(data['experience'], errors='coerce')


data.fillna(data.mean(), inplace=True)

print("\nCleaned Data:")
print(data)


X = data[['experience',
          'test_score(out of 10)',
          'interview_score(out of 10)']]

y = data['salary($)']


model = LinearRegression()
model.fit(X, y)


salary1 = model.predict([[2, 9, 6]])
salary2 = model.predict([[12, 10, 10]])

print("\nPredicted Salary for (2 yr, 9 test, 6 interview):", salary1[0])
print("Predicted Salary for (12 yr, 10 test, 10 interview):", salary2[0])

Saving hiring.csv to hiring (1).csv
Original Data:
  experience  test_score(out of 10)  interview_score(out of 10)  salary($)
0        NaN                    8.0                           9      50000
1        NaN                    8.0                           6      45000
2       five                    6.0                           7      60000
3        two                   10.0                          10      65000
4      seven                    9.0                           6      70000
5      three                    7.0                          10      62000
6        ten                    NaN                           7      72000
7     eleven                    7.0                           8      80000

Cleaned Data:
   experience  test_score(out of 10)  interview_score(out of 10)  salary($)
0    6.333333               8.000000                           9      50000
1    6.333333               8.000000                           6      45000
2    5.000000               6.0

  data['experience'] = data['experience'].replace({


In [15]:

# Multiple Linear Regression - 1000 Companies (Corrected)


import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from google.colab import files

uploaded = files.upload()

data = pd.read_csv(next(iter(uploaded)))

print("Original Data:")
print(data.head())

X = data[['R&D Spend', 'Administration', 'Marketing Spend', 'State']]
y = data['Profit']

ct = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(drop='first'), ['State'])
    ],
    remainder='passthrough'
)

model = Pipeline(steps=[
    ('preprocessor', ct),
    ('regressor', LinearRegression())
])

model.fit(X, y)


new_data = pd.DataFrame({
    'R&D Spend': [91694.48],
    'Administration': [515841.3],
    'Marketing Spend': [11931.24],
    'State': ['Florida']
})


profit = model.predict(new_data)

print("\nPredicted Profit:", profit[0])

Saving 1000_Companies.csv to 1000_Companies (1).csv
Original Data:
   R&D Spend  Administration  Marketing Spend       State     Profit
0  165349.20       136897.80        471784.10    New York  192261.83
1  162597.70       151377.59        443898.53  California  191792.06
2  153441.51       101145.55        407934.54     Florida  191050.39
3  144372.41       118671.85        383199.62    New York  182901.99
4  142107.34        91391.77        366168.42     Florida  166187.94

Predicted Profit: 510570.9926108249
