In [9]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.feature_extraction.text import CountVectorizer

train_data = {
    'essay_text': ['The importance of exercise cannot be overstated. Regular exercise has numerous benefits for both the body and mind. For example, it can help reduce the risk of chronic diseases, such as heart disease and diabetes, and it can also improve mental health and cognitive function. Therefore, it is important for everyone to incorporate exercise into their daily routine.',
                   'Social media has become an integral part of modern life. While it has many benefits, such as connecting people from around the world and sharing information, it also has drawbacks. For example, it can lead to addiction and social isolation, and it can also be a platform for spreading misinformation. Therefore, it is important to use social media in moderation and to be aware of its potential downsides.'],
    'grade': [5.0, 1.0]
}


def preprocess_text(text):
    text = text.lower()
    
    text = ''.join(c for c in text if c.isalnum() or c.isspace())
    
    stop_words = set(['a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now'])
    text = ' '.join([word for word in text.split() if word not in stop_words])
    
    return text

train_data['essay_text'] = list(map(preprocess_text, train_data['essay_text']))


vectorizer = CountVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(train_data['essay_text'])
y_train = train_data['grade']

model = LinearRegression()
model.fit(X_train, y_train)


def grade_essay(essay):
    essay = preprocess_text(essay)
    essay_vec = vectorizer.transform([essay])
    grade = model.predict(essay_vec)[0]
    return grade

# Different parts of a longer essay about covid I took from a website.
# essay = "Covid-19 pandemic had change the normal living situation of people and due to it occurrence, new life norms had being introduced and being practiced which purposely to control the transmission of the virus. Talking about employment aspect, Covid-19 had cause large, medium and small businesses and companies in Malaysia to face big challenges and issues to sustain their growth. Tourism, food supplies, travel, retail and also manufacturing are among the business sectors that highly affected as people are being advised to stay indoor which make their business become slow and some of them also need to halt due to many factors which mainly associated with financial. The employers and employees are both affected as they have connection and benefit each other in term of income and business continuity."
# essay = "As stated in Occupational Safety and Health Act 1994 (OSHA 1994) under Section 15 (1), every employer and self-employed person are responsible to ensure the safety, health and welfare of his employees at the workplace, so far as is practicable. Due this pandemic occurrence, one of the challenges is the assurance of employee safety where employers were required to assess the risk exposure in workplace to Covid-19 before preventive measures can be implemented. Exposure risk level might be different depend on type of occupation which can be categorized as low (job with infrequent physical contact like office workers), medium risk (occupation with frequent contact such as retail workers) and high risk (job which have contact with people who likely to have Covid-19 such as healthcare workers). The risk exposure assessed by employers with the help of Safety Health Officer (SHO) or Occupational Officer support can guide employers of what control measures must be implemented in order to guarantee the safety of working environment from health risk due to transmission of virus to fellow employees. Stated under Section 18 (a) in Occupational Safety and Health (Safety and Health Officer) Regulations 1997, SHO are responsible to advise the employer or any person in charge of a workplace regarding the control measures to be taken for the assurance of the safety and health aspect of employees."
essay = "Additionally, employers are required to develop Covid-19 response plan, implement the plan which also involved of communication regarding workplace hazards, flexibilities and protections to workers and related information such as Covid-19 way of transmission together with its sign and symptoms. The occurrence of disease pandemic had led to the absence of employee which might be because of fear to being exposed in workplace or having at home commitment such as taking care of children since day care centers were being closed. This will impact the working productivity, income and also affecting the other employees. Therefore, the most common control measure that being taken to prevent the spreading of virus in the same time to enable workers to present in the workplace is through the wearing of Personal Protective Equipment (PPE). Employers should provide and train their employees regarding the PPE on how to use, dispose and disinfect it. Employers of sectors which do not required the use of PPE such as retail sector before disease occurrence should provide their employees with PPE include of gloves and 3-ply surgical face mask. Some employees include of doctors, nurses, soldiers, Environmental Health officers and other healthcare workers and others who associated in combating the Covid-19 transmission, they need to be provided with full set of PPE include of gloves, respirator, disposable face shield, goggles, gown and even shoes cover. The full set PPE are essential for them as they may have contact objects and surrounding surfaces that harbor the virus. The PPE given by employers should not be charged on expenditure employees as stated in OSHA 1994 under Section 26."



grade_essay(essay)



3.4878048780487805