In [14]:
import pandas as pd
from transformers import pipeline

In [15]:
file_path = "/content/Sugarcane_Supply_Chain_2021_2024.csv"

In [16]:
data = pd.read_csv(file_path)

In [17]:
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased")
def analyze_risk(comment):
    comment_lower = str(comment).lower()
    if "drought" in comment_lower or "low rainfall" in comment_lower:
        return "Risk due to drought or insufficient rainfall."
    elif "flood" in comment_lower or "waterlogging" in comment_lower:
        return "Risk due to flooding or excess water."
    elif "pest" in comment_lower or "disease" in comment_lower:
        return "Risk from pests or crop diseases."
    elif "transport" in comment_lower or "logistics" in comment_lower:
        return "Supply chain disruption due to transportation issues."
    elif "strike" in comment_lower or "protest" in comment_lower:
        return "Risk from labor strikes or protests."
    else:
        return "No significant risk detected."

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


In [21]:
def perform_analysis(data):
    results = []
    for _, row in data.iterrows():
        comment = row.get('Comment', '')
        sentiment = sentiment_pipeline(comment)[0] if comment else {"label": "neutral", "score": 0.5}
        risk = analyze_risk(comment)

        results.append({
            "Region": row.get('Region', 'Unknown'),
            "Month": row.get('Month', 'Unknown'),
            "Year": row.get('Year', 'Unknown'),
            "Comment": comment,
            "Sentiment": sentiment['label'],
            "Sentiment Score": sentiment['score'],
            "Risk Analysis": risk
        })

    return pd.DataFrame(results)

In [19]:
def main():
    analyzed_data = perform_analysis(data)
    output_file = "analyzed_sugarcane_supply_chain.csv"
    analyzed_data.to_csv(output_file, index=False)
    print(f"Analysis complete. Results saved to {output_file}")
    print("\nSample Output:\n", analyzed_data.head())

if __name__ == "__main__":
    main()

Analysis complete. Results saved to analyzed_sugarcane_supply_chain.csv

Sample Output:
     Region    Month     Year Comment Sentiment  Sentiment Score  \
0  Unknown  Unknown  Unknown           neutral              0.5   
1  Unknown  Unknown  Unknown           neutral              0.5   
2  Unknown  Unknown  Unknown           neutral              0.5   
3  Unknown  Unknown  Unknown           neutral              0.5   
4  Unknown  Unknown  Unknown           neutral              0.5   

                   Risk Analysis  
0  No significant risk detected.  
1  No significant risk detected.  
2  No significant risk detected.  
3  No significant risk detected.  
4  No significant risk detected.  
