#Date          :30-10-2022
#Team ID       :PNT2022TMID46351 
#Project name  :Retail Store Stock Inventory Analytics

1.Loading Datasets into tools

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("abalone.csv")

**2.Performing Visualization**

**Univariate Analysis**

In [None]:
data.head()

In [None]:
sns.boxplot(data['Diameter'])

In [None]:
plt.hist(data['Diameter'])

In [None]:
plt.plot(data['Diameter'].head(10))

In [None]:
plt.pie(data['Diameter'].head(),autopct='%.3f')

In [None]:
sns.distplot(data['Diameter'].head(300))

In [None]:
plt.scatter(data['Diameter'].head(400),data['Length'].head(400))

In [None]:
plt.bar(data['Sex'].head(20),data['Rings'].head(20))
plt.title('Bar plot')
plt.xlabel('Diameter')
plt.ylabel('Rings')

In [None]:
sns.barplot(data['Sex'], data['Rings'])

In [None]:
sns.jointplot(data['Diameter'].head(50),data['Rings'].head(100))

In [None]:
sns.barplot('Diameter','Rings',hue='Sex',data=data.head())

In [None]:
sns.lineplot(data['Diameter'].head(),data['Rings'].head())

In [None]:
sns.boxplot(data['Sex'].head(10),data['Diameter'].head(10),data['Rings'].head(10))

In [None]:
fig=plt.figure(figsize=(8,5))
sns.heatmap(data.head().corr(),annot=True)

In [None]:
sns.pairplot(data.head(),hue='Height')

In [None]:
sns.pairplot(data.head())

**3.Perform Descriptive Statistics on the dataset**

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.mode().T

In [None]:
data.shape

In [None]:
data.kurt()

In [None]:
data.skew()

In [None]:
data.var()

In [None]:
data.nunique()

**4.Check for missing values and deal with them**

In [None]:
data.isna()

In [None]:
data.isna().any()

In [None]:
data.isna().sum()

In [None]:
data.isna().any().sum()

**5.Find the outliers and replace them outliers**

In [None]:
sns.boxplot(data['Diameter'])

In [None]:
quant=data.quantile(q=[0.25,0.75])
quant

In [None]:
iqr=quant.loc[0.75]-quant.loc[0.25]
iqr

In [None]:
low=quant.loc[0.25]-(1.5*iqr)
low

In [None]:
up=quant.loc[0.75]+(1.5*iqr)
up

In [None]:
data['Diameter']=np.where(data['Diameter']<0.155,0.4078,data['Diameter'])
sns.boxplot(data['Diameter'])

In [None]:
sns.boxplot(data['Length'])

In [None]:
data['Length']=np.where(data['Length']<0.23,0.52, data['Length'])
sns.boxplot(data['Length'])

In [None]:
sns.boxplot(data['Height'])

In [None]:
data['Height']=np.where(data['Height']<0.04,0.139, data['Height'])
data['Height']=np.where(data['Height']>0.23,0.139, data['Height'])
sns.boxplot(data['Height'])

In [None]:
sns.boxplot(data['Whole weight'])

In [None]:
data['Whole weight']=np.where(data['Whole weight']>0.9,0.82, data['Whole weight'])
sns.boxplot(data['Whole weight'])

In [None]:
sns.boxplot(data['Shucked weight'])

In [None]:
data['Shucked weight']=np.where(data['Shucked weight']>0.93,0.35, data['Shucked weight'])
sns.boxplot(data['Shucked weight'])

In [None]:
sns.boxplot(data['Viscera weight'])

In [None]:
data['Viscera weight']=np.where(data['Viscera weight']>0.46,0.18, data['Viscera weight'])
sns.boxplot(data['Viscera weight'])

In [None]:
sns.boxplot(data['Shell weight'])

In [None]:
data['Shell weight']=np.where(data['Shell weight']>0.61,0.2388, data['Shell weight'])
sns.boxplot(data['Shell weight'])

**6.Check for Categorical columns and perform encoding.**

In [None]:
data['Sex'].replace({'M':1,'F':0,'I':2},inplace=True)
data

**7.Split the data into dependent and independent variables.**

In [None]:
x=data.drop(columns= ['Rings'])
y=data['Rings']
x

In [None]:
y

**8.Scale the independent variables**

In [None]:
from sklearn.preprocessing import scale
x = scale(x)
x

**9.Split the data into training and testing**

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = 0.2)
print(x_train.shape, x_test.shape)

**10.Build the Model**

In [None]:
from sklearn.linear_model import LinearRegression
MLR=LinearRegression()

**11.Train the model**

In [None]:
MLR.fit(x_train,y_train)

**12.Test the model**

In [None]:
y_pred=MLR.predict(x_test)
y_pred

In [None]:
pred=MLR.predict(x_train)
pred

In [None]:
from sklearn.metrics import r2_score
accuracy=r2_score(y_test,y_pred)
accuracy

In [None]:
MLR.predict([[1,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.150]])

**13.Measure the performance using Metrics**

In [None]:
from sklearn import metrics
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test,y_pred))

**LASSO**

In [None]:
from sklearn.linear_model import Lasso, Ridge
#intialising model
lso=Lasso(alpha=0.01,normalize=True)
#fit the model
lso.fit(x_train,y_train)
Lasso(alpha=0.01, normalize=True)
#prediction on test data
lso_pred=lso.predict(x_test)
#coef
coef=lso.coef_
coef

In [None]:
from sklearn import metrics
from sklearn.metrics import mean_squared_error
metrics.r2_score(y_test,lso_pred)

In [None]:
np.sqrt(mean_squared_error(y_test,lso_pred))

**RIDGE**

In [None]:
#initialising model
rg=Ridge(alpha=0.01,normalize=True)
#fit the model
rg.fit(x_train,y_train)
Ridge(alpha=0.01, normalize=True)
#prediction
rg_pred=rg.predict(x_test)
rg_pred

In [None]:
rg.coef_

In [None]:
metrics.r2_score(y_test,rg_pred)

In [None]:
np.sqrt(mean_squared_error(y_test,rg_pred))