# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import scale

# Loading the Dataset

In [None]:
df = pd.read_csv("abalone.csv")

In [None]:
df.info()

# Univariate analysis

In [None]:
df.describe()

In [None]:
df[['Length','Rings']].describe()

In [None]:
df[['Length','Rings']].plot(kind="hist")

In [None]:
sns.displot(df[['Length','Rings']])

# `Bivariate analysis

In [None]:
df.hist(bins=50,figsize=(30,20));

In [None]:
df.plot.scatter(x='Length',y='Rings')

In [None]:
plt.bar(df.Length,df.Rings)

# Multivariate analysis

In [None]:
sns.heatmap(df.corr(),annot=True)

# Descriptive Statistics 

In [None]:
df.info()

In [None]:
df[['Length','Rings']].mean()

In [None]:
df[['Length','Rings']].median()

In [None]:
df[['Length','Rings']].std()

# Handle the Missing values

In [None]:
df.shape

In [None]:
df.isnull().sum()

# Find Outliers and Replacing

In [None]:
sns.boxplot(x=df["Length"])

In [None]:
q2 = df.Length.quantile(0.25)
q3 = df.Length.quantile(0.75)
IQR = q3 - q2
upper_limit = q3+1.5*IQR
lower_limit = q2-1.5*IQR

In [None]:
print(lower_limit)
print(upper_limit)
df["Length"].head

In [None]:
df["Length"] = np.where(df["Length"]<lower_limit,6.52,df["Length"])

In [None]:
df["Length"] = np.where(df["Length"]<lower_limit,6.52,df["Length"])
sns.violinplot(x=df["Length"])

In [None]:
sns.boxplot(x=df["Rings"])

In [None]:
def find_outliers(df):
    q4=df.quantile(0.25)
    q5=df.quantile(0.75)
    IQR=q4-q5
    outliers=df[((df<(q4-1.5*IQR))|(df>(q5+1.5*IQR)))]
    return outliers

In [None]:
outliers=find_outliers(df["Rings"])
print("number of outliers:"+str(len(outliers)))
print("max outlier value:"+str(outliers.max()))
print("min outlier value:"+str(outliers.min()))
outliers

In [None]:
df["Rings"] = np.where(df["Rings"]<lower_limit,6.52,df["Rings"])

In [None]:
df["Rings"] = np.where(df["Rings"]<lower_limit,6.52,df["Rings"])
sns.violinplot(x=df["Rings"])

# check for Categorical columns and perform encoding

In [None]:
df_1=df.copy()
labelencoder=LabelEncoder()
df_1['Length']=labelencoder.fit_transform(df_1['Rings'])
df_1[['Length','Rings']]

# Split the datainto dependent and independent variables# 

In [None]:
y = df["Length"]
y

In [None]:
X = df.drop("Length",axis=1)
X

# Scale the independent variables

In [None]:
scale=df.iloc[:,1:4].values
print("\n Original values:\n\n",scale)

In [None]:
from sklearn import preprocessing
min_max_scale=preprocessing.MinMaxScaler(feature_range=(0,1))
new_scale=min_max_scale.fit_transform(scale)
print("'\n Values After minmax scaling \n\n'",new_scale)

In [None]:
Standardisation=preprocessing.StandardScaler()
new_scale=Standardisation.fit_transform(scale)
print("\n\n values after std : \n\n",new_scale)

# Split the data into training and testing

In [None]:
from sklearn.model_selection import train_test_split 

In [None]:
train_size=0.10

In [None]:
x1=df.drop(columns=['Length']).copy()
y1=df['Length']

In [None]:
x1_train, x1_rem, y1_train, y1_rem=train_test_split(x1,y1, train_size=0.10)

In [None]:
test_size=0.5
x1_valid,x1_test, y1_valid, y1_test=train_test_split(x1_rem,y1_rem,test_size=0.5)

In [None]:
print(x1_train.shape),print(y1_train.shape)

In [None]:
print(x1_valid.shape),print(y1_valid.shape)

In [None]:
print(x1_test.shape),print(y1_test.shape)

In [None]:
from fast_ml.model_development import train_valid_test_split
x1_train, y1_train, x1_valid, y1_valid, x1_test, y1_test=train_valid_test_split(df, target='Length',train_size=0.10,valid_size=0.1,test_size=0.1)

In [None]:
print(x1_train.shape),print(y1_train.shape)

In [None]:
print(x1_valid.shape),print(y1_valid.shape)

In [None]:
print(x1_test.shape),print(y1_test.shape)

# Build the Model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
from sklearn import datasets

In [None]:
z=datasets.load_iris()
s=z.data
d=z.target

In [None]:
lr=LinearRegression()
lr.fit(x1_train,y1_train)

# Train the Model & Test the Model

In [None]:
y_lr_train_prt=lr.predict(x1_train)

In [None]:
y_lr_test_prt=lr.predict(x1_test)

# Measure the performance using Metrics1

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
lr_train_ms=mean_squared_error(y1_train,y_lr_train_prt)
lr_train_r2=r2_score(y1_train,y_lr_train_prt)
lr_test_ms=mean_squared_error(y1_test,y_lr_test_prt)
lr_test_r2=r2_score(y1_test,y_lr_test_pred)

In [None]:
print(lr_train_ms)