**Installing Required Package**

In [54]:
pip install pandas numpy matplotlib seaborn scikit-learn nltk tensorflow




In [55]:
import pandas as pd
import numpy as np
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [57]:
df = pd.read_csv("/content/essays.tsv", encoding='ISO-8859-1', sep='\t')


In [59]:
df.head()

Unnamed: 0,essay_id,essay_set,essay,domain1_predictionid,domain2_predictionid
0,2383,1,I believe that computers have a positive effec...,2383,
1,2384,1,"Dear @CAPS1, I know some problems have came up...",2384,
2,2385,1,"Dear to whom it @MONTH1 concern, Computers are...",2385,
3,2386,1,"Dear @CAPS1 @CAPS2, @CAPS3 has come to my atte...",2386,
4,2387,1,"Dear Local newspaper, I think that people have...",2387,


**Preprocess Essay**

In [60]:
def preprocess_text(text):

    # Convert to lowercase and remove special characters
    text = text.lower()
    text = nltk.re.sub(r'[^\w\s]', '', text)
    return text

In [61]:
df['cleaned_essay'] = df['essay'].apply(preprocess_text)

In [62]:
print(df[['essay', 'cleaned_essay']].head())

                                               essay  \
0  I believe that computers have a positive effec...   
1  Dear @CAPS1, I know some problems have came up...   
2  Dear to whom it @MONTH1 concern, Computers are...   
3  Dear @CAPS1 @CAPS2, @CAPS3 has come to my atte...   
4  Dear Local newspaper, I think that people have...   

                                       cleaned_essay  
0  i believe that computers have a positive effec...  
1  dear caps1 i know some problems have came up w...  
2  dear to whom it month1 concern computers arent...  
3  dear caps1 caps2 caps3 has come to my attentio...  
4  dear local newspaper i think that people have ...  



**Vectorization using TF-IDF**

In [63]:
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['cleaned_essay'])
print(tfidf.get_feature_names_out()[:10])

['1046' '12' '1250' '1930s' '1937' '1956' '1961' '55' '85th' 'abandon']


In [64]:
X_train, X_test, y_train, y_test = train_test_split(X, df['domain1_predictionid'], test_size=0.2, random_state=42)

**Model Training**


In [65]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [66]:
y_pred = model.predict(X_test)

In [67]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 1867645.0158640426


**Function for scoring essays**

In [76]:
def custom_score(essay_text):
    score = 0

    # Check essay length (words)
    word_count = len(essay_text.split())
    if word_count < 200:
        score += 1
    elif 200 <= word_count <= 500:
        score += 3
    else:
        score += 5

    # Check sentence structure (sentences)
    sentence_count = essay_text.count('.')
    if sentence_count < 5:
        score += 1
    elif 5 <= sentence_count <= 10:
        score += 3
    else:
        score += 5

    # Check vocabulary richness (unique words)
    vocabulary = len(set(essay_text.split()))
    if vocabulary < 50:
        score += 1
    elif 50 <= vocabulary <= 100:
        score += 3
    else:
        score += 5

    # Check paragraph structure (using newlines to separate paragraphs)
    paragraphs = essay_text.split('\n')
    if len(paragraphs) < 3:
        score += 1
    elif 3 <= len(paragraphs) <= 5:
        score += 3
    else:
        score += 5

    # Check for repetitiveness (overuse of words or phrases)
    repetitive_words = ['i think', 'and', 'the', 'in my opinion']
    repetition_count = sum(essay_text.lower().count(word) for word in repetitive_words)
    if repetition_count > 5:
        score -= 2
    elif repetition_count > 2:
        score -= 1

    return score

**Function to generate feedback based on the essay text**

In [77]:
def generate_detailed_feedback(essay_text):
    feedback = ""

    # Check essay length
    word_count = len(essay_text.split())
    if word_count < 200:
        feedback += "\n The essay seems too short. Consider elaborating on your points to meet the required length. "
    elif word_count > 1000:
        feedback += "\n The essay seems quite long. Try to keep it concise while still covering key points. "

    # Check sentence structure
    sentence_count = essay_text.count('.')
    if sentence_count < 5:
        feedback += "\n The essay could benefit from more complete sentences to increase clarity. "

    # Check paragraph structure
    paragraphs = essay_text.split('\n')
    if len(paragraphs) < 3:
        feedback += "\n Try dividing your essay into more paragraphs to improve readability. "

    # Check use of vocabulary
    vocabulary = len(set(essay_text.split()))
    if vocabulary < 50:
        feedback += "\n Consider using a broader vocabulary to make your essay more engaging and sophisticated. "

    # Check for clarity and flow (simple check for repetitive phrases)
    if essay_text.lower().count('and') > 10:
        feedback += "\n The essay repeats 'and' too often. Try varying your sentence structure to improve flow. "

    # Check grammar
    if essay_text.lower().count('i think') > 3:
        feedback += "\n Avoid using 'I think' repetitively. This can weaken your argument. Try to be more assertive in your statements. "

    return feedback

**Function to predict score and generate feedback**

In [78]:
def get_essay_feedback():
    print("Please enter your essay:")
    essay_text = input()

    cleaned_essay = preprocess_text(essay_text)

    score = custom_score(cleaned_essay)

    feedback = generate_detailed_feedback(essay_text)

    print(f"\nCustom Predicted Score: {score}")
    print("Detailed Feedback:")
    print(feedback)

get_essay_feedback()

Please enter your essay:
The environment is the foundation of life, providing essential resources like air, water, and soil. It sustains biodiversity, ensuring the balance of ecosystems. However, human activities such as deforestation, pollution, and overconsumption have led to environmental degradation, threatening the planet's health. Climate change, rising temperatures, and loss of wildlife are alarming consequences. Protecting the environment is crucial for future generations. Simple actions like planting trees, reducing waste, conserving water, and using renewable energy can make a significant difference. By fostering awareness and adopting sustainable practices, we can restore harmony with nature and ensure a healthier, greener planet for all.

Custom Predicted Score: 4
Detailed Feedback:

 The essay seems too short. Consider elaborating on your points to meet the required length. 
 Try dividing your essay into more paragraphs to improve readability. 
