# 1. Import Libraries

In [None]:
import os
import pandas
import numpy as np
import torch
import statsmodels.formula.api as sm

# 2. Load Model

In [None]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
model.classes = [0] # person dectection

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-3-14 Python-3.10.12 torch-2.2.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


# 3. Link to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 4. Paths

In [None]:
# path to image folder
path = "/content/drive/My Drive/with_img.csv"



```
# This is formatted as code
```

# 5. Example

In [None]:
img = path_images + "id_5731.jpg"
results = model(img)
results.show()

# 6. Multi-image Analysis

In [None]:
df_person = pandas.DataFrame(columns=[    "xmin",
                        "ymin",
                        "xmax",
                        "ymax",
                        "confidence",
                        "class",
                        "name",
                        "filename",
                        "person"])

In [None]:
filelist = os.listdir(path_images)
for file in filelist:
  name = file.split(sep='.')[0]
  img = path_images + file
  results = model(img)
  x = results.pandas().xyxy[0]
  if x.empty == False:
    person_counter = x.shape[0]
    person_index = np.arange(0, person_counter, 1)
    x.insert(x.shape[1],'filename', name)
    x.insert(x.shape[1],'person', person_index)
    df_person = pandas.concat([df_person,x], axis=0)

df_person.to_csv( path_csv_out + "person_detect.csv",sep=",")
df_person

Unnamed: 0,xmin,ymin,xmax,ymax,confidence,class,name,filename,person
0,19.912136,0.000000,710.666138,404.700836,0.275437,0,person,id_29889,0
0,482.066040,345.907074,494.410400,377.146637,0.689953,0,person,id_30031,0
1,496.781647,344.989349,509.525787,377.129578,0.642964,0,person,id_30031,1
0,132.783279,208.479446,334.554535,402.208923,0.332251,0,person,id_30054,0
0,348.757751,0.854856,711.519287,396.441650,0.318237,0,person,id_30103,0
...,...,...,...,...,...,...,...,...,...
1,162.767044,106.832581,341.968262,402.034302,0.887727,0,person,id_34248,1
2,316.314606,125.844681,546.481567,400.152588,0.874827,0,person,id_34248,2
3,0.000000,151.135345,205.751465,402.344055,0.826705,0,person,id_34248,3
0,18.636761,0.791101,662.297729,404.769012,0.697190,0,person,id_34250,0


# 7. Merge Data

In [None]:
# filename list
df_name = pandas.DataFrame(columns=['uid','filename'])
i = 0
for file in filelist:
  name = file.split(sep='.')[0]
  uid = name.split(sep='_')[1]
  df_name.loc[i,'filename'] = name
  df_name.loc[i,'uid'] = uid
  i = i + 1
df_name

Unnamed: 0,uid,filename
0,3,id_3
1,37,id_37
2,29766,id_29766
3,29785,id_29785
4,29845,id_29845
...,...,...
232,34248,id_34248
233,34250,id_34250
234,63,id_63
235,59,id_59


In [None]:
# construct is_person indicator
df_person = df_person.drop_duplicates(subset=['filename'])
df_is_person = df_name.merge(df_person, how='left', on='filename', indicator = True)

df_is_person.insert(df_is_person.shape[1],'is_person', 0)
df_is_person.loc[df_is_person['_merge']=="both", 'is_person']=1

df_is_person

Unnamed: 0,uid,filename,xmin,ymin,xmax,...,class,name,person,_merge,is_person
0,3,id_3,,,,...,,,,left_only,0
1,37,id_37,,,,...,,,,left_only,0
2,29766,id_29766,,,,...,,,,left_only,0
3,29785,id_29785,,,,...,,,,left_only,0
4,29845,id_29845,,,,...,,,,left_only,0
...,...,...,...,...,...,...,...,...,...,...,...
232,34248,id_34248,538.966187,71.722458,718.994080,...,0,person,0,both,1
233,34250,id_34250,18.636761,0.791101,662.297729,...,0,person,0,both,1
234,63,id_63,,,,...,,,,left_only,0
235,59,id_59,,,,...,,,,left_only,0


In [None]:
df_camp = pandas.read_csv ("/content/drive/My Drive/LC Data Science/crowdfunding_data.csv")
df_camp['uid'] = df_camp['uid'].apply(str)
df_analysis = df_is_person.merge(df_camp, how='left', on='uid')
df_analysis
df_analysis.to_csv( path_csv_out + "full.csv",sep=",")

In [None]:
variables = df_analysis.columns.tolist()
variables

['uid',
 'filename_x',
 'xmin',
 'ymin',
 'xmax',
 'ymax',
 'confidence',
 'class',
 'name',
 'person',
 '_merge',
 'is_person',
 'Unnamed: 0',
 'filename_y',
 'language',
 'URL',
 'currency',
 'conversion',
 'Raised',
 'raised_USD',
 'Goal',
 'goal_USD',
 'Title',
 'Created',
 'pull_date',
 'date_difference',
 'Category',
 'Description',
 'Donations',
 'ln_donations',
 'unique_donors',
 'ln_unique_donors',
 'Followers',
 'ln_followers',
 'Shares',
 'ln_shares',
 'WC',
 'Analytic',
 'Clout',
 'Authentic',
 'Tone',
 'WPS',
 'Sixltr',
 'Dic',
 'function.',
 'pronoun',
 'ppron',
 'i',
 'we',
 'you',
 'shehe',
 'they',
 'ipron',
 'article',
 'prep',
 'auxverb',
 'adverb',
 'conj',
 'negate',
 'verb',
 'adj',
 'compare',
 'interrog',
 'number',
 'quant',
 'affect',
 'posemo',
 'negemo',
 'anx',
 'anger',
 'sad',
 'social',
 'family',
 'friend',
 'female',
 'male',
 'cogproc',
 'insight',
 'cause',
 'discrep',
 'tentat',
 'certain',
 'differ',
 'percept',
 'see',
 'hear',
 'feel',
 'bio',
 '

# 8. Data Analysis

In [None]:
df_analysis['raised_USD'] = df_analysis['raised_USD'].apply(np.log1p)
result = sm.ols(formula="raised_USD ~ is_person + posemo + negemo + WC ", data = df_analysis).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:             raised_USD   R-squared:                       0.050
Model:                            OLS   Adj. R-squared:                  0.033
Method:                 Least Squares   F-statistic:                     3.021
Date:                Thu, 14 Mar 2024   Prob (F-statistic):             0.0187
Time:                        23:45:53   Log-Likelihood:                -456.73
No. Observations:                 237   AIC:                             923.5
Df Residuals:                     232   BIC:                             940.8
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      6.0058      0.340     17.660      0.0