In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [8]:
# Load your dataset
df = pd.read_csv('./data.csv')
df = df.dropna(subset=[
    'Url', 'Category', 'Position', 'Title', 'Location', 
    'Amount_Raised', 'Goal', 'Number_of_Donators', 
    'Length_of_Fundraising', 'FB_Shares', 'GFM_hearts', 
    'Text', 'Latitude', 'Longitude'
])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1185 entries, 0 to 1241
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             1185 non-null   int64  
 1   Url                    1185 non-null   object 
 2   Category               1185 non-null   object 
 3   Position               1185 non-null   float64
 4   Title                  1185 non-null   object 
 5   Location               1185 non-null   object 
 6   Amount_Raised          1185 non-null   float64
 7   Goal                   1185 non-null   object 
 8   Number_of_Donators     1185 non-null   object 
 9   Length_of_Fundraising  1185 non-null   object 
 10  FB_Shares              1185 non-null   object 
 11  GFM_hearts             1185 non-null   object 
 12  Text                   1185 non-null   object 
 13  Latitude               1185 non-null   float64
 14  Longitude              1185 non-null   float64
 15  labe

In [9]:
df = df[['Title', 'Text', 'Amount_Raised', 'Goal']]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1185 entries, 0 to 1241
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Title          1185 non-null   object 
 1   Text           1185 non-null   object 
 2   Amount_Raised  1185 non-null   float64
 3   Goal           1185 non-null   object 
dtypes: float64(1), object(3)
memory usage: 46.3+ KB


In [10]:
df['combined_text'] = df['Title'] + " " + df['Text']
df

Unnamed: 0,Title,Text,Amount_Raised,Goal,combined_text
0,92 Yr old Man Brutally Attacked.,Rodolfo Rodriguez needs your help today! 92 Yr...,327345.0,15000,92 Yr old Man Brutally Attacked. Rodolfo Rodri...
1,Olivia Stoy:Transplant & Liv it up!,Thomas Stoy needs your help today! Olivia Stoy...,316261.0,1.0M,Olivia Stoy:Transplant & Liv it up! Thomas Sto...
2,AUTOLOGOUS T CELL TRANSPLANT,Philip Defonte needs your help today! AUTOLOGO...,241125.0,250000,AUTOLOGOUS T CELL TRANSPLANT Philip Defonte ne...
3,A chance of rebirth,Sriram Kanniah needs your help today! A chance...,237424.0,225000,A chance of rebirth Sriram Kanniah needs your ...
4,Claire Wineland Needs Our Help,Melissa Yeager needs your help today! Claire W...,236590.0,225000,Claire Wineland Needs Our Help Melissa Yeager ...
...,...,...,...,...,...
1237,Help Send Michelle to Israel!,Michelle Serlet needs your help today! Help Se...,10370.0,8000,Help Send Michelle to Israel! Michelle Serlet ...
1238,Support Alvinâs Family,Kalvin Ahmed needs your help today! Support Al...,10349.0,15000,Support Alvinâs Family Kalvin Ahmed needs yo...
1239,College & Medical expenses,Anjelica Vossler needs your help today! Colleg...,10330.0,50000,College & Medical expenses Anjelica Vossler ne...
1240,Rhonda's Wish,Anesi Maverick Tuufuli needs your help today! ...,10265.0,15000,Rhonda's Wish Anesi Maverick Tuufuli needs you...


In [12]:
def convert_to_numeric(val):
    if isinstance(val, str): 
        if 'K' in val:
            return float(val.replace('K', '').replace(',', '')) * 1e3
        elif 'M' in val:
            return float(val.replace('M', '').replace(',', '')) * 1e6
        else:
            return float(val.replace(',', ''))
    return val 

df['Amount_Raised'] = df['Amount_Raised'].apply(convert_to_numeric)
df['Goal'] = df['Goal'].apply(convert_to_numeric)

df.head()


Unnamed: 0,Title,Text,Amount_Raised,Goal,combined_text
0,92 Yr old Man Brutally Attacked.,Rodolfo Rodriguez needs your help today! 92 Yr...,327345.0,15000.0,92 Yr old Man Brutally Attacked. Rodolfo Rodri...
1,Olivia Stoy:Transplant & Liv it up!,Thomas Stoy needs your help today! Olivia Stoy...,316261.0,1000000.0,Olivia Stoy:Transplant & Liv it up! Thomas Sto...
2,AUTOLOGOUS T CELL TRANSPLANT,Philip Defonte needs your help today! AUTOLOGO...,241125.0,250000.0,AUTOLOGOUS T CELL TRANSPLANT Philip Defonte ne...
3,A chance of rebirth,Sriram Kanniah needs your help today! A chance...,237424.0,225000.0,A chance of rebirth Sriram Kanniah needs your ...
4,Claire Wineland Needs Our Help,Melissa Yeager needs your help today! Claire W...,236590.0,225000.0,Claire Wineland Needs Our Help Melissa Yeager ...


In [14]:
def is_genuine(row):
    if row['Amount_Raised'] >= row['Goal']:
        return 'Genuine'
    elif row['Amount_Raised'] >= (0.75 * row['Goal']):  # Within 75% of the goal
        return 'Potentially Genuine'
    else:
        return 'Not Genuine'
df['Campaign_Status'] = df.apply(is_genuine, axis=1)
df.head()

Unnamed: 0,Title,Text,Amount_Raised,Goal,combined_text,Campaign_Status
0,92 Yr old Man Brutally Attacked.,Rodolfo Rodriguez needs your help today! 92 Yr...,327345.0,15000.0,92 Yr old Man Brutally Attacked. Rodolfo Rodri...,Genuine
1,Olivia Stoy:Transplant & Liv it up!,Thomas Stoy needs your help today! Olivia Stoy...,316261.0,1000000.0,Olivia Stoy:Transplant & Liv it up! Thomas Sto...,Not Genuine
2,AUTOLOGOUS T CELL TRANSPLANT,Philip Defonte needs your help today! AUTOLOGO...,241125.0,250000.0,AUTOLOGOUS T CELL TRANSPLANT Philip Defonte ne...,Potentially Genuine
3,A chance of rebirth,Sriram Kanniah needs your help today! A chance...,237424.0,225000.0,A chance of rebirth Sriram Kanniah needs your ...,Genuine
4,Claire Wineland Needs Our Help,Melissa Yeager needs your help today! Claire W...,236590.0,225000.0,Claire Wineland Needs Our Help Melissa Yeager ...,Genuine


In [15]:
X = df['combined_text']
y = df['Campaign_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [17]:
model = SVC(kernel='linear', probability=True) 
model.fit(X_train_tfidf, y_train)

SVC(kernel='linear')

In [18]:
y_pred = model.predict(X_test_tfidf)

In [19]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

Accuracy: 37.13%
                     precision    recall  f1-score   support

            Genuine       0.34      0.60      0.44        72
        Not Genuine       0.45      0.44      0.45        95
Potentially Genuine       0.16      0.04      0.07        70

           accuracy                           0.37       237
          macro avg       0.32      0.36      0.32       237
       weighted avg       0.33      0.37      0.33       237



In [20]:
with open('svm_campaign_classifier.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)


In [21]:
with open('tfidf_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

print("Model and vectorizer saved successfully.")

Model and vectorizer saved successfully.


In [27]:
import pandas as pd
import pickle

# Load the saved model and vectorizer
with open('svm_campaign_classifier.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

with open('tfidf_vectorizer.pkl', 'rb') as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)

# Prepare new input data
new_title = "Your campaign title here"
new_text = "Your campaign text here"
combined_text = new_title + " " + new_text

# Transform the input text using the loaded vectorizer
input_data = vectorizer.transform([combined_text])

# Make predictions
prediction = model.predict(input_data)
print(f"Campaign Status: {prediction[0]}")


Campaign Status: Genuine
