## AI/ML Internship

### Task 3: Linear Regression

#### 1. Importing necessary Library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#### 2. Load and Preview Dataset

In [2]:
titanic_data = pd.read_csv("Titanic-Dataset.csv")
print(titanic_data.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


#### 3. Preprocessing: Handle missing values, select numeric features

In [3]:
titanic_data = titanic_data[['Fare', 'Pclass', 'Age', 'SibSp', 'Parch']]

In [4]:
# Drop rows with missing 'Age' or 'Fare'
titanic_data.dropna(subset=['Fare', 'Age'], inplace=True)

#### 4. Split Features and Target

In [6]:
# Feature
X = titanic_data[['Pclass', 'Age', 'SibSp', 'Parch']]
# Target
y = titanic_data['Fare']

#### 5. Train-Test Split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### 6. Fit Linear Regression Model

In [8]:
model = LinearRegression()
model.fit(X_train, y_train)

#### 7. Predictions

In [9]:
y_pred = model.predict(X_test)

#### 8. Evaluation Metrics

In [10]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")


Model Evaluation:
Mean Absolute Error (MAE): 25.18
Mean Squared Error (MSE): 3424.57
R² Score: 0.18


#### 9. Coefficients

In [11]:
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"Intercept: {model.intercept_:.2f}")


Model Coefficients:
Pclass: -37.10
Age: -0.18
SibSp: 6.60
Parch: 12.16
Intercept: 114.30


#### 10. Plotting Age vs Fare with Regression Line)

plt.figure(figsize=(8,6))
sns.scatterplot(x=X_test['Age'], y=y_test, label="Actual", alpha=0.6)
sns.lineplot(x=X_test['Age'], y=model.predict(X_test), color='red', label="Predicted (Line)")
plt.title("Age vs Fare with Regression Line")
plt.xlabel("Age")
plt.ylabel("Fare")
plt.legend()
plt.show()

### Conclusion

**Linear regression model estimates Titanic prices based on features such as class and age with reasonable accuracy (R² ≈ value varies). Class and age have a significant impact on fare. Although the model indicates trends, its performance indicates that the relationships are likely more complex and require more advanced models or additional features to make better estimates.**