In [231]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
import warnings
warnings.filterwarnings('ignore')

In [210]:
train = pd.read_csv('/kaggle/input/feedback-prize-english-language-learning/train.csv')
test = pd.read_csv('/kaggle/input/feedback-prize-english-language-learning/test.csv')

In [211]:
train.head()

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5


In [212]:
test.head()

Unnamed: 0,text_id,full_text
0,0000C359D63E,when a person has no experience on a job their...
1,000BAD50D026,Do you think students would benefit from being...
2,00367BB2546B,"Thomas Jefferson once states that ""it is wonde..."


In [213]:
train.isna().sum()

text_id        0
full_text      0
cohesion       0
syntax         0
vocabulary     0
phraseology    0
grammar        0
conventions    0
dtype: int64

In [214]:
test.isna().sum()

text_id      0
full_text    0
dtype: int64

In [215]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3911 entries, 0 to 3910
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   text_id      3911 non-null   object 
 1   full_text    3911 non-null   object 
 2   cohesion     3911 non-null   float64
 3   syntax       3911 non-null   float64
 4   vocabulary   3911 non-null   float64
 5   phraseology  3911 non-null   float64
 6   grammar      3911 non-null   float64
 7   conventions  3911 non-null   float64
dtypes: float64(6), object(2)
memory usage: 244.6+ KB


# Splitting Data

In [216]:
X = train['full_text']
Y = train.drop(['full_text', 'text_id'], axis = 1)
x_train, x_test,y_train,y_test = train_test_split(X,Y, test_size = 0.30, random_state = 1)


# Extracting Features

In [217]:
cv = CountVectorizer()
features = cv.fit_transform(x_train)
x_test = cv.transform(x_test)

# Buillding Model

In [218]:
model = SVR()

# Wrapping Model into Multioutput Regression

In [219]:
wrapper = MultiOutputRegressor(model)

In [220]:
wrapper.fit(features, y_train)

MultiOutputRegressor(estimator=SVR())

# Checking model accuracy

In [221]:
print(f'Accuracy : {round(wrapper.score(features, y_train), 4)*100}%')

Accuracy : 67.01%


In [222]:
y_pred = wrapper.predict(x_test)

In [223]:
test_split = test['full_text']
test_split = cv.transform(test_split)

In [224]:
final_pred = wrapper.predict(test_split)

In [225]:
train

Unnamed: 0,text_id,full_text,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0016926B079C,I think that students would benefit from learn...,3.5,3.5,3.0,3.0,4.0,3.0
1,0022683E9EA5,When a problem is a change you have to let it ...,2.5,2.5,3.0,2.0,2.0,2.5
2,00299B378633,"Dear, Principal\n\nIf u change the school poli...",3.0,3.5,3.0,3.0,3.0,2.5
3,003885A45F42,The best time in life is when you become yours...,4.5,4.5,4.5,4.5,4.0,5.0
4,0049B1DF5CCC,Small act of kindness can impact in other peop...,2.5,3.0,3.0,3.0,2.5,2.5
...,...,...,...,...,...,...,...,...
3906,FFD29828A873,I believe using cellphones in class for educat...,2.5,3.0,3.0,3.5,2.5,2.5
3907,FFD9A83B0849,"Working alone, students do not have to argue w...",4.0,4.0,4.0,4.0,3.5,3.0
3908,FFDC4011AC9C,"""A problem is a chance for you to do your best...",2.5,3.0,3.0,3.0,3.5,3.0
3909,FFE16D704B16,Many people disagree with Albert Schweitzer's ...,4.0,4.5,4.5,4.0,4.5,4.5


In [259]:
sub = pd.DataFrame(final_pred, columns = ['cohesion','syntax','vocabulary', 'phraseology', 'grammar' , 'conventions' ])
sub

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,3.082238,2.999308,3.185372,3.05046,2.897714,2.868414
1,2.874783,2.744143,2.856657,2.480214,2.33621,2.764309
2,3.754493,3.586015,3.616965,3.657195,3.69791,3.5612


In [261]:
sub = pd.concat([sub, test['text_id']], axis = 1)
sub

Unnamed: 0,cohesion,syntax,vocabulary,phraseology,grammar,conventions,text_id
0,3.082238,2.999308,3.185372,3.05046,2.897714,2.868414,0000C359D63E
1,2.874783,2.744143,2.856657,2.480214,2.33621,2.764309,000BAD50D026
2,3.754493,3.586015,3.616965,3.657195,3.69791,3.5612,00367BB2546B


In [263]:
sub = sub.iloc[:, [6, 0,1,2,3,4,5]]
sub

Unnamed: 0,text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
0,0000C359D63E,3.082238,2.999308,3.185372,3.05046,2.897714,2.868414
1,000BAD50D026,2.874783,2.744143,2.856657,2.480214,2.33621,2.764309
2,00367BB2546B,3.754493,3.586015,3.616965,3.657195,3.69791,3.5612


In [264]:
sub.to_csv('submission.csv', index = False)

Unnamed: 0,text_id,conventions,conventions.1
0,0000C359D63E,,2.868414
1,000BAD50D026,,2.764309
2,00367BB2546B,,3.5612
