# Yemeni Dataset: Public Perceptions of the United States in 2014

### 1. Set up working environment:

In [1]:
import numpy as np
import collections 
import scipy as sp
import pandas as pd
from scipy.stats import norm
from numpy.random import normal
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.formula.api as smf
import statsmodels.api as st
%matplotlib inline 

### 2. Load data and take a glance at its current state:

In [2]:
yemen_data = pd.read_csv("../../Desktop/Y.Work.csv")

In [3]:
yemen_data.head()

Unnamed: 0,Profile1,Profile2,Profile3,Profile4,Profile5,Profile6,Q1,Q2,Q3,Q4,...,Q32.Choice4,Q32.Choice5,Q32.Choice6,Q33,Q34.Choice1,Q34.Choice2,Q34.Choice3,Q34.Choice4,Q34.Choice5,Q34.Choice6
0,1,5,1,2,3,6,2,2,2,1,...,,,,3,1,2,6,5,4,3
1,1,3,1,3,1,6,2,2,2,1,...,3.0,4.0,6.0,1,1,3,6,4,2,5
2,2,5,1,2,3,6,2,2,1,1,...,2.0,3.0,6.0,2,1,2,6,3,5,4
3,2,4,1,2,2,6,2,2,2,1,...,3.0,2.0,1.0,3,3,1,6,5,2,4
4,2,5,1,2,1,6,1,2,1,2,...,,,1.0,1,1,2,5,3,4,6


In [4]:
yemen_data.shape

prob_cols = ['Q31.Choice1','Q31.Choice2','Q31.Choice3','Q31.Choice4','Q31.Choice5',
             'Q32.Choice1','Q32.Choice2','Q32.Choice3','Q32.Choice4','Q32.Choice5','Q32.Choice6']

# del yemen_data['Q32.Choice6']
for element in prob_cols:
    del yemen_data[str(element)]
    
# yemen_data.shape()

In [5]:
yemen_data.shape

(783, 64)

In [6]:
yemen_data.isnull().sum().all()

False

#### Identifies Columns Containing Less Than Expected Count: 

In [7]:
def total_missing_values(dataset):   
    """This function searches through all the columns for blank values or missing values."""
    for i in range(len(dataset.count())):
        if dataset.count()[i] != 783:
            print(dataset.columns[i])
        else:
            pass

total_missing_values(yemen_data)

#### Identifies Columns Where NaN values Present:

In [8]:
#This function looks for any missing values, and appends these missing values to a list.
yemen_data.columns[yemen_data.isnull().any()].tolist()  

[]

### Clean the data:

In [9]:
#This changes any periods to underscores 
yemen_data.columns = [c.lower().replace(".","_") for c in yemen_data.columns]

In [10]:
#This renames the columns with a readable profiling characteristic
yemen_data.rename(columns={'profile1': 'gender', 
                           'profile2': 'level_of_education', 
                           'profile3': 'urban_or_rural', 
                           'profile4': 'employment', 
                           'profile5': 'age', 
                           'profile6': 'region'}, 
                  inplace=True)

In [11]:
#Filling in all the NaN values with 0
yemen_data = yemen_data.fillna(value=0) 

In [12]:
# four_probs = (yemen_data.replace(to_replace = ["1","2"], value=[0, 1]).q4)

def isolation(dataset):
    for col in dataset:
        col_to_string = str(col)
        total_values = dataset[col_to_string].value_counts()
        total_values_to_frame = pd.Series.to_frame(total_values)
        isolate_rows = total_values_to_frame.index
        print (isolate_rows, col_to_string)
#         for i in isolate_rows:
#             if i == " ":
#                 print ("Blank Value",)
 
isolation(yemen_data)

Int64Index([1, 2], dtype='int64') gender
Int64Index([4, 5, 2, 1, 3], dtype='int64') level_of_education
Int64Index([1, 2, 4], dtype='int64') urban_or_rural
Int64Index([2, 1, 3], dtype='int64') employment
Int64Index([2, 1, 3, 4, 5], dtype='int64') age
Int64Index([1, 3, 6, 2, 4], dtype='int64') region
Int64Index([2, 1], dtype='int64') q1
Index(['2', '1', ' '], dtype='object') q2
Index(['2', '1', ' '], dtype='object') q3
Index(['1', '2', ' ', '3'], dtype='object') q4
Index(['2', '1', ' '], dtype='object') q5
Index(['2', '1', ' '], dtype='object') q6
Index([' ', '2', '1', '3'], dtype='object') q6_5
Index(['5', '4', '3', '2', ' ', '1'], dtype='object') q7_choice1
Index(['1', '2', '3', '5', '4', ' '], dtype='object') q7_choice2
Index(['1', '2', '3', ' ', '4', '5'], dtype='object') q7_choice3
Index(['4', '3', '5', '2', ' ', '1'], dtype='object') q7_choice4
Index(['5', '4', '3', '1', '2', ' '], dtype='object') q7_choice5
Index(['2', '4', '5', '3', '1', ' '], dtype='object') q8
Index(['1', '2', 

### Cleaned table with blanks:
### Define numeric values with strings:

In [13]:
#These are the cleaned up profiling characteristics:

yemen_data['gender'] = (yemen_data.replace(to_replace = [1,2], 
                             value=[1, 
                            2]).gender)

yemen_data['level_of_education'] = (yemen_data.replace(to_replace = [1,2,3,4,5], 
                                value=[1, 
                                       2, 
                                       3, 
                                       4, 
                                       5]).level_of_education)

yemen_data['employment'] = (yemen_data.replace(to_replace = [1,2,3], 
                                 value=[1, 
                                2, 
                                3]).employment)

yemen_data['urban_or_rural'] = (yemen_data.replace(to_replace = [1,2,4], 
                                  value=[1, 
                                         2, 
                                         0]).urban_or_rural)

yemen_data['age']  = (yemen_data.replace(to_replace = [1,2,3,4,5], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5]).age)

yemen_data['region'] = (yemen_data.replace(to_replace = [1,2,3,4,5,6], 
                              value=[1, 
                                     2, 
                                     3, 
                                     4, 
                                     5, 
                                     6]).region)


In [14]:
# This replace the numeric values with strings for each question

yemen_data['q1'] = (yemen_data.replace(to_replace = [1,2], value=[1, 2]).q1)

yemen_data['q2'] = (yemen_data.replace(to_replace = ["1","2"," "], value=[1, 2, 0]).q2)

yemen_data['q3'] = (yemen_data.replace(to_replace = ["1","2", " "], value=[1, 2, 0]).q3)

yemen_data['q4'] = (yemen_data.replace(to_replace = ["1","2","3"," "], value=[1, 2, 0, 0]).q4)

yemen_data['q5'] = (yemen_data.replace(to_replace = ["1","2"," "], value=[1, 2, 0]).q5)

yemen_data['q6'] = (yemen_data.replace(to_replace = ["1","2"," "], value=[1, 2, 0]).q6)

yemen_data['q6_5'] = (yemen_data.replace(to_replace = ["1","2","3", " "], value=[1, 2, 0, 0]).q6_5)

yemen_data['q7_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5,
                                        0]).q7_choice1)

yemen_data['q7_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5,
                                        0]).q7_choice2)

yemen_data['q7_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5,
                                        0]).q7_choice3)


yemen_data['q7_choice4'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5,
                                        0]).q7_choice4)

yemen_data['q7_choice5'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                                 value=[1, 
                                        2, 
                                        3, 
                                        4, 
                                        5,
                                        0]).q7_choice4)

yemen_data['q8'] = (yemen_data.replace(to_replace = ["1","2","3","4","5",' '], 
                         value=[1, 
                                2, 
                                3, 
                                4, 
                                5,
                                0]).q8)

yemen_data['q9'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                         value=[1, 
                                2, 
                                3, 
                                4, 
                                5,
                                0]).q9)

                                

yemen_data['q10'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5,
                                 0]).q10)


yemen_data['q11'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q11)

yemen_data['q12'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q12)

yemen_data['q13'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                              value=[1, 
                                     2, 
                                     3, 
                                     4, 
                                     5, 
                                     0]).q13)

yemen_data['q14'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q14)

yemen_data['q15'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q15)

yemen_data['q16'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                         value=[1, 
                                2, 
                                3, 
                                4, 
                                5, 
                                0]).q16)

yemen_data['q17'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q17)

yemen_data['q18'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q18)

yemen_data['q19'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q19)

yemen_data['q20'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","14", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0,
                                 0]).q20)

yemen_data['q21'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q21)

yemen_data['q22'] = (yemen_data.replace(to_replace = ["1","2","3","4","5", " "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q22)

yemen_data['q23_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","25"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 0, 
                                 0, 
                                 0]).q23_choice1)

yemen_data['q23_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","25"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 0, 
                                 0, 
                                 0]).q23_choice2)


yemen_data['q23_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","25"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 0, 
                                 0, 
                                 0]).q23_choice3)

yemen_data['q24'] = (yemen_data.replace(to_replace = ["1","2","3","4","5"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 4, 
                                 5, 
                                 0]).q24)

yemen_data['q25_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","12"," "], 
                                  value=[1, 
                                         2, 
                                         3, 
                                         0, 
                                         0, 
                                         0]).q25_choice1)

yemen_data['q25_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","12"," "], 
                                  value=[1, 
                                         2, 
                                         3, 
                                         0, 
                                         0, 
                                         0]).q25_choice2)

yemen_data['q25_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","12"," "], 
                                  value=[1, 
                                         2, 
                                         3, 
                                         0, 
                                         0, 
                                         0]).q25_choice3)

yemen_data['q26'] = (yemen_data.replace(to_replace = ["1","2","3"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 0]).q26)

yemen_data['q27_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","5"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         0, 
                                         0]).q27_choice1)

yemen_data['q27_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","5"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         0, 
                                         0]).q27_choice2)

yemen_data['q27_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","5"," "], 
                                   value=[1, 
                                          2, 
                                          3,
                                          0, 
                                          0]).q27_choice2)

yemen_data['q28_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q28_choice1)


yemen_data['q28_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q28_choice2)

yemen_data['q28_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q28_choice3)

yemen_data['q28_choice4'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                   value=[1, 
                                          2, 
                                          3,
                                          4, 
                                          5, 
                                          6, 
                                          0]).q28_choice4)

yemen_data['q28_choice5'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                   value=[1, 
                                          2, 
                                          3,
                                          4, 
                                          5, 
                                          6, 
                                          0]).q28_choice5)

yemen_data['q28_choice6'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6","8"," "], 
                                   value=[1, 
                                          2, 
                                          3,
                                          4, 
                                          5, 
                                          6, 
                                          0,
                                          0]).q28_choice6)

yemen_data['q29_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q29_choice1)

yemen_data['q29_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q29_choice2)

yemen_data['q29_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," ","25"], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0,
                                         0]).q29_choice3)

yemen_data['q29_choice4'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q29_choice4)

yemen_data['q29_choice5'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," ", "43"], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0,
                                         0]).q29_choice5)

yemen_data['q29_choice6'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q29_choice6)

yemen_data['q30'] = (yemen_data.replace(to_replace = ["1","2","3","5"," "], 
                          value=[1, 
                                 2, 
                                 3, 
                                 0, 
                                 0]).q30)


yemen_data['q33'] = (yemen_data.replace(to_replace = ["1","2","3","6"," "], 
                          value=[1, 
                                 2, 
                                 3,
                                 0, 
                                 0]).q33)

yemen_data['q34_choice1'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q34_choice1)

yemen_data['q34_choice2'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q34_choice2)

yemen_data['q34_choice3'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6","53", " "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6,
                                         0,
                                         0]).q34_choice3)

yemen_data['q34_choice4'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5, 
                                         6, 
                                         0]).q34_choice4)

yemen_data['q34_choice5'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," ", "13"], 
                                   value=[1, 
                                          2, 
                                          3,
                                          4, 
                                          5,  
                                          6, 
                                          0,
                                          0]).q34_choice5)


yemen_data['q34_choice6'] = (yemen_data.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                  value=[1, 
                                         2, 
                                         3,
                                         4, 
                                         5,  
                                         6, 
                                         0]).q34_choice6)

In [15]:
yemen_data.q34_choice1.value_counts()

1    679
2     26
0     18
5     17
3     16
6     15
4     12
Name: q34_choice1, dtype: int64

In [16]:
question_1 = "Have you traveled outside of Yemen?"
question_2 = "Have you traveled to the United States?"
question_3 = "Do you know of any foreigners in Yemen or abroad?"
question_4 = "If you received the opportunity to travel to America, \n and there would not be any problem with your paperwork or visa, \n would you go?"
question_5 = "Do you know of an American foreign aid project in Yemen?"
question_6 = "Do you know of any Yemeni zawamil (poetry) that speaks about America?"
question_7 = "If you wanted to know about political developments or news headlines, \n where would you look first?"
question_8 = "Yemeni news agencies report accurately on Yemeni political events in the news."
question_9 = "There are a number of political developments in Yemen \n that are not understood or not known even today."
question_10 = "Yemeni people are a democratic people."
question_11 = "American politics in Yemen represents the desires of American people."
question_12 = "There are members of American government who oppose American foreign policy in Yemen."
question_13 = "The American government knew of the terrorist operation on 9/11, \n but allowed it to happen anyway."
question_14 = "Who established al-Qaeda?"
question_15 = "One of American's long term goals in Yemen is to occupy the country."
question_16 = "The majority of American citizens realize and understand the difference \n between an extremist and a Yemeni citizen."
question_17 = "American citizens see Yemeni citizens as a people \n who sympathize with extremist movements."
question_18 = "American interests in Yemen oppose the interests of Yemeni people."
question_19 = "What is your opinion of American citizens?"
question_20 = "What is your opinion of the American Government?"
question_21 = "In the news, the voice of Muslim extremists is louder \n than the voice of normal Muslims."
question_22 = "Yemeni people want political and economic stability \n even if it was from a democratic regime."
question_23 = "Among the following options, what is the primary thing \n that affects your opinion of America?"
question_24 = "Among the following countries, who is responsible \n for establishing the state of Israel as well as drawing the Israeli borders?"
question_25 = "Among the following options, what is the primary thing \n that affects your opinion of America?"
question_26 = "There are justifications for America's use of drones?"
question_27 = "Among the following options, what would you consider the worst?"
question_28 = "Please rank the following crises in Yemen \n from greatest importance to least importance?"
question_29 = "Please rank the following positive aspects of America \n from greatest importance to least importance?"
question_30 = "Do you think the American government encourages and helps \n Middle Eastern countries achieve the things listed above?"
question_31 = "Among the following options, what is the worst about America?"
question_32 = "Among the following options, please rank which is most important to least important?"
question_33 = "What is more important in your opinion, \n the country giving aid or the aid itself?"
question_34 = "What is third most important in your life?"

## Descriptive Statistics and Plots:

In [17]:
def basic_plot(answer, clust, question=None):
    """Plots a basic bar plot of any question or profiling characteristic in the dataset"""
    #Prints the question on the survey
#     print (question)
    
    #prints a descriptive statistic 
    print((answer.value_counts()/(len(clust))*100))
    
    percentage = ((answer.value_counts()/(len(clust))*100))
    percentage.plot(kind="bar", 
                    figsize=(20,10), 
                    stacked=False, 
                    fontsize=20, 
                    legend = True, 
                    colormap="viridis")
    
#     plt.title(question,fontsize = 30)
    plt.ylabel("Percentage (%) ", fontsize = 30)
    plt.xlabel("Responses", fontsize = 25)

### Basic Bar Plot:

In [18]:
from kmodes import kmodes

km = kmodes.KModes(n_clusters=4, init='Huang', n_init=5, verbose=1)

clusters = km.fit_predict(yemen_data)

print (clusters)
print (clusters.shape)

# Print the cluster centroids
print(km.cluster_centroids_)


Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 285, cost: 27199.0
Run 1, iteration: 2/100, moves: 92, cost: 27146.0
Run 1, iteration: 3/100, moves: 42, cost: 27137.0
Run 1, iteration: 4/100, moves: 1, cost: 27137.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 316, cost: 27183.0
Run 2, iteration: 2/100, moves: 125, cost: 27115.0
Run 2, iteration: 3/100, moves: 53, cost: 27078.0
Run 2, iteration: 4/100, moves: 16, cost: 27078.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 269, cost: 27098.0
Run 3, iteration: 2/100, moves: 90, cost: 26994.0
Run 3, iteration: 3/100, moves: 72, cost: 26968.0
Run 3, iteration: 4/100, moves: 1, cost: 26968.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 4, iteration: 1/100, moves: 281, cost: 27304.0
Run 4, iteration: 2/100, mo

In [19]:
from kmodes import kmodes

km = kmodes.KModes(n_clusters=4, init='Huang', n_init=5, verbose=1)

clusters = km.fit_predict(yemen_data)

print (clusters)
clust_df =pd.DataFrame(clusters)
clust_array = np.asarray(clusters)

Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 1, iteration: 1/100, moves: 256, cost: 27217.0
Run 1, iteration: 2/100, moves: 97, cost: 27114.0
Run 1, iteration: 3/100, moves: 42, cost: 27098.0
Run 1, iteration: 4/100, moves: 13, cost: 27098.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 2, iteration: 1/100, moves: 322, cost: 27584.0
Run 2, iteration: 2/100, moves: 171, cost: 27085.0
Run 2, iteration: 3/100, moves: 135, cost: 26987.0
Run 2, iteration: 4/100, moves: 47, cost: 26974.0
Run 2, iteration: 5/100, moves: 12, cost: 26963.0
Run 2, iteration: 6/100, moves: 11, cost: 26963.0
Init: initializing centroids
Init: initializing clusters
Starting iterations...
Run 3, iteration: 1/100, moves: 339, cost: 27190.0
Run 3, iteration: 2/100, moves: 110, cost: 27132.0
Run 3, iteration: 3/100, moves: 53, cost: 27108.0
Run 3, iteration: 4/100, moves: 44, cost: 27092.0
Run 3, iteration: 5/100, moves: 5, cost: 27092.0
Init:

In [20]:
import numpy as np

index_0 = [i for i,v in enumerate(clust_array) if v == 0]
print (index_0)

index_1 = [i for i,v in enumerate(clust_array) if v == 1]

index_2 = [i for i,v in enumerate(clust_array) if v == 2]

index_3 = [i for i,v in enumerate(clust_array) if v == 3]

# index_4 = [i for i,v in enumerate(clust_array) if v == 4]

# index_5 = [i for i,v in enumerate(clust_array) if v == 5]

# label_0 = [(v) for i,v in enumerate(clust_array) if v == 0]
# print (label_0)

# label_0 = [(i,v) for i,v in enumerate(clust_array) if v == 0]
# print (label_0)

[0, 1, 2, 3, 10, 12, 20, 23, 24, 25, 30, 37, 38, 41, 45, 53, 58, 61, 62, 65, 67, 69, 70, 91, 93, 94, 95, 100, 109, 112, 113, 124, 129, 130, 136, 137, 145, 154, 164, 167, 168, 169, 170, 176, 180, 182, 184, 185, 189, 190, 191, 192, 194, 198, 202, 205, 215, 216, 217, 218, 221, 223, 224, 225, 231, 243, 244, 251, 261, 278, 288, 318, 325, 327, 329, 335, 336, 338, 340, 343, 356, 360, 361, 362, 363, 365, 366, 369, 371, 373, 374, 375, 377, 378, 379, 380, 385, 386, 387, 389, 390, 397, 399, 400, 401, 403, 407, 421, 423, 440, 441, 442, 447, 453, 454, 458, 460, 463, 475, 478, 479, 482, 485, 495, 496, 497, 511, 512, 514, 515, 518, 522, 523, 529, 535, 537, 538, 540, 542, 543, 545, 546, 547, 548, 550, 551, 552, 555, 556, 558, 559, 561, 562, 565, 568, 570, 572, 577, 579, 580, 581, 583, 587, 588, 591, 592, 593, 594, 597, 598, 599, 601, 603, 604, 605, 611, 614, 615, 616, 618, 623, 627, 628, 631, 634, 635, 636, 637, 639, 644, 645, 647, 648, 650, 654, 656, 657, 659, 660, 661, 662, 664, 666, 667, 676, 677, 

In [21]:
# You have a dataframe now for each of the clusters 
first_clust = pd.DataFrame(yemen_data, index=[index_0])
second_clust = pd.DataFrame(yemen_data, index=[index_1])
third_clust = pd.DataFrame(yemen_data, index=[index_2])
fourth_clust = pd.DataFrame(yemen_data, index=[index_3])

In [22]:
cluster_list = [first_clust, second_clust, third_clust, fourth_clust]

for cluster in cluster_list:
    
    cluster['gender'] = (cluster.replace(to_replace = [1,2], 
                                 value=["Male", 
                                "Female"]).gender)

    cluster['level_of_education'] = (cluster.replace(to_replace = [1,2,3,4,5], 
                                    value=["University", 
                                           "High School", 
                                           "Elementary", 
                                           "Read and Write", 
                                           "Cannot Read or Write"]).level_of_education)

    cluster['employment'] = (cluster.replace(to_replace = [1,2,3], 
                                     value=["Employed", 
                                    "Unemployed", 
                                    "Student"]).employment)

    cluster['urban_or_rural'] = (cluster.replace(to_replace = [1,2,0], 
                                      value=["Rural", 
                                             "Urban", 
                                             "Missing"]).urban_or_rural)

    cluster['age'] = (cluster.replace(to_replace = [1,2,3,4,5], 
                                     value=["15-19", 
                                            "20-29", 
                                            "30-29", 
                                            "40-60", 
                                            "x>60"]).age)

    cluster['region'] = (cluster.replace(to_replace = [1,2,3,4,5,6], 
                                  value=["Azal", 
                                         "Janid", 
                                         "Tihama", 
                                         "Hadramout", 
                                         "Taiz", 
                                         "Aden"]).region)

In [23]:
cluster_list = [first_clust, second_clust, third_clust, fourth_clust]

for cluster in cluster_list:
    
    cluster['q1'] = (cluster.replace(to_replace = [1,2], value=["Yes", "No"]).q1)

    cluster['q2'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q2)

    cluster['q3'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q3)

    cluster['q4'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q4)

    cluster['q5'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q5)

    cluster['q6'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q6)
    
    cluster['q6_5'] = (cluster.replace(to_replace = [1,2,0], value=["Yes", "No", "Missing"]).q6_5)

    cluster['q7_choice1'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                     value=["Radio", 
                                            "Internet", 
                                            "Television", 
                                            "Newspaper", 
                                            "Your Friends",
                                            "Missing"]).q7_choice1)

    cluster['q7_choice2'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                     value=["Radio", 
                                            "Internet", 
                                            "Television", 
                                            "Newspaper", 
                                            "Your Friends",
                                            "Missing"]).q7_choice2)

    cluster['q7_choice3'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                     value=["Radio", 
                                            "Internet", 
                                            "Television", 
                                            "Newspaper", 
                                            "Your Friends",
                                            "Missing"]).q7_choice3)


    cluster['q7_choice4'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                     value=["Radio", 
                                            "Internet", 
                                            "Television", 
                                            "Newspaper", 
                                            "Your Friends",
                                            "Missing"]).q7_choice4)
    
    cluster['q7_choice5'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                     value=["Radio", 
                                            "Internet", 
                                            "Television", 
                                            "Newspaper", 
                                            "Your Friends",
                                            "Missing"]).q7_choice5)
    
    cluster['q8'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                             value=["Strongly Agree", 
                                    "Agree", 
                                    "Disagree", 
                                    "Strongly Disagree", 
                                    "No Opinion", 
                                    "Missing"]).q8)

    cluster['q9'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                             value=["Strongly Agree", 
                                    "Agree", 
                                    "Disagree", 
                                    "Strongly Disagree", 
                                    "No Opinion", 
                                    "Missing"]).q9)


    cluster['q10'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q10)

    cluster['q11'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q11)

    cluster['q12'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q12)

    cluster['q13'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                                  value=["Strongly Agree", 
                                         "Agree", 
                                         "Disagree", 
                                         "Strongly Disagree", 
                                         "No Opinion", 
                                         "Missing"]).q13)

    cluster['q14'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["America", 
                                     "Saudi Arabia", 
                                     "Osama Bin Laden \n and his supporters", 
                                     "Mixture of \n Saudi Arabia and America", 
                                     "No Opinion", 
                                     "Missing"]).q14)

    cluster['q15'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q15)

    cluster['q16'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q16)

    cluster['q17'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q17)

    cluster['q18'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q18)

    cluster['q19'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Very Positive", 
                                     "Positive", 
                                     "Negative", 
                                     "Very Negative", 
                                     "No Opinion", 
                                     "Missing"]).q19)

    cluster['q20'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Very Positive", 
                                     "Positive", 
                                     "Negative", 
                                     "Very Negative", 
                                     "No Opinion",
                                     "Missing"]).q20)

    cluster['q21'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q21)

    cluster['q22'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Strongly Agree", 
                                     "Agree", 
                                     "Disagree", 
                                     "Strongly Disagree", 
                                     "No Opinion", 
                                     "Missing"]).q22)

    cluster['q23_choice1'] = (cluster.replace(to_replace = [1,2,3,0], 
                              value=["Drones", 
                                     "Iraq", 
                                     "Palestinian Israeli Crisis",  
                                     "Missing"]).q23_choice1)

    cluster['q23_choice2'] = (yemen_data.replace(to_replace = [1,2,3,0], 
                              value=["Drones", 
                                     "Iraq", 
                                     "Palestinian Israeli Crisis",
                                     "Missing"]).q23_choice2)


    cluster['q23_choice3'] = (cluster.replace(to_replace = [1,2,3,0], 
                              value=["Drones", 
                                     "Iraq", 
                                     "Palestinian Israeli Crisis",  
                                     "Missing"]).q23_choice3)

    cluster['q24'] = (cluster.replace(to_replace = [1,2,3,4,5,0], 
                              value=["Russia", 
                                     "America", 
                                     "Britain", 
                                     "Jordan", 
                                     "I don't know", 
                                     "Missing"]).q24)

    cluster['q25_choice1'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Presence", 
                                             "Spies", 
                                             "Drones", 
                                             "Missing"]).q25_choice1)

    cluster['q25_choice2'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Presence", 
                                             "Spies", 
                                             "Drones", 
                                             "Missing"]).q25_choice2)

    cluster['q25_choice3'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Presence", 
                                             "Spies", 
                                             "Drones", 
                                             "Missing"]).q25_choice3)

    cluster['q26'] = (cluster.replace(to_replace = [1,2,3,0], 
                              value=["Yes", 
                                     "No", 
                                     "No Opinion", 
                                     "Missing"]).q26)

    cluster['q27_choice1'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Drones", 
                                             "Taiz burnings", 
                                             "Hospital",
                                             "Missing"]).q27_choice1)

    cluster['q27_choice2'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Drones", 
                                             "Taiz burnings", 
                                             "Hospital", 
                                             "Missing"]).q27_choice2)

    cluster['q27_choice3'] = (cluster.replace(to_replace = [1,2,3,0], 
                                      value=["Drones", 
                                             "Taiz burnings", 
                                             "Hospital",
                                             "Missing"]).q27_choice3)

    cluster['q28_choice1'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism",
                                             "Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice1)


    cluster['q28_choice2'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism",
                                             "Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice2)

    cluster['q28_choice3'] = (yemen_data.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism",
                                             "Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice3)

    cluster['q28_choice4'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism","Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice4)

    cluster['q28_choice5'] = (cluster.replace(to_replace = ["1","2","3","4","5","6"," "], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism",
                                             "Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice5)

    cluster['q28_choice6'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Gas Crisis", 
                                             "Electricity Crisis", 
                                             "Terrorism",
                                             "Water Crisis", 
                                             "Food Crisis", 
                                             "Civil War", 
                                             "Missing"]).q28_choice6)

    cluster['q29_choice1'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice1)

    cluster['q29_choice2'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice2)

    cluster['q29_choice3'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice3)

    cluster['q29_choice4'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice4)

    cluster['q29_choice5'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice5)

    cluster['q29_choice6'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Freedom of Expression", 
                                             "Government Services", 
                                             "Economic Opportunities",
                                             "Legal Equality", 
                                             "Technology", 
                                             "Security & Protection", 
                                             "Missing"]).q29_choice6)

    cluster['q30'] = (cluster.replace(to_replace = [1,2,3,0],
                              value=["Yes", 
                                     "No", 
                                     "No Opinion", 
                                     "Missing"]).q30)


    cluster['q33'] = (cluster.replace(to_replace = [1,2,3,0], 
                              value=["The aid", 
                                     "The country", 
                                     "No opinion",
                                     "Missing"]).q33)

    cluster['q34_choice1'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure", 
                                             "Missing"]).q34_choice1)

    cluster['q34_choice2'] = (yemen_data.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure", 
                                             "Missing"]).q34_choice2)

    cluster['q34_choice3'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure",
                                             "Missing"]).q34_choice3)

    cluster['q34_choice4'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure", 
                                             "Missing"]).q34_choice4)

    cluster['q34_choice5'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure", 
                                             "Missing"]).q34_choice5)

    cluster['q34_choice6'] = (cluster.replace(to_replace = [1,2,3,4,5,6,0], 
                                      value=["Religion", 
                                             "Family", 
                                             "Work or Employment",
                                             "Health", 
                                             "Education", 
                                             "Leisure", 
                                             "Missing"]).q34_choice6)

In [24]:
def clust_profile(clust_name):
    count = 1
    for name in clust_name:
        print ("CLUSTER:", count)
        for col in name.columns:
#             print (col)
            if col == 'gender' or col == 'age' or col == 'level_of_education' or col == 'employment' or col == 'region' or col == 'ur':
                print (name[str(col)].value_counts() / (len(name))*100)
                print ("\n")
        count += 1
        print ("\n")

clust_profile(cluster_list)

CLUSTER: 1
Female    64.341085
Male      35.658915
Name: gender, dtype: float64


Cannot Read or Write    36.821705
Read and Write          25.968992
Elementary              14.341085
High School             12.403101
University              10.465116
Name: level_of_education, dtype: float64


Unemployed    56.589147
Employed      21.705426
Student       21.705426
Name: employment, dtype: float64


20-29    27.906977
15-19    20.930233
30-29    20.155039
40-60    17.054264
x>60     13.953488
Name: age, dtype: float64


Tihama       48.062016
Azal         27.131783
Aden         13.565891
Janid         8.139535
Hadramout     3.100775
Name: region, dtype: float64




CLUSTER: 2
Female    62.146893
Male      37.853107
Name: gender, dtype: float64


Cannot Read or Write    33.898305
High School             23.728814
Read and Write          20.338983
University              13.559322
Elementary               8.474576
Name: level_of_education, dtype: float64


Unemployed    49.152542
Student 

In [35]:
print ((second_clust.q25_choice1.value_counts() / len(second_clust))*100)

Drones      61.016949
Presence    17.514124
Spies       17.514124
Missing      3.954802
Name: q25_choice1, dtype: float64


In [42]:
def majorities(clust_name):
    count = 1
    total_agrees_among_clust = []
    total_disagree_among_clust = []
    for name in clust_name:
        print ("CLUSTER:", count)
        common_agrees_among_clust = []
        common_disagree_among_clust = []
        for col in name.columns:
            all_values=(name[str(col)].value_counts() / (len(name))*100)
    #         print (all_values)
            
            if 'Yes' in all_values:
                total_true = all_values['Yes'] 
                if total_true > 50.0:
                    print ("Column Name:", str(col), 'Total Yes', total_true)
            
            if 'No' in all_values:
                total_false = all_values['No'] 
                if total_false > 50.0:
                    print ("Column Name:", str(col), 'Total No', total_false)
    #                 print ("\n")

            if "Radio" in all_values:
                if all_values['Radio'] > 50.0:
                    print ("Column Name:", str(col), "Total Radio", all_values['Radio'])

            if "Internet" in all_values:
                if all_values['Internet'] > 50.0:
                    print ("Column Name:", str(col), "Total Internet", all_values['Internet'])

            if "Television" in all_values:
                if all_values['Television'] > 50.0:
                    print ("Column Name:", str(col), "Total Television", all_values['Television'])

            if "Newspaper" in all_values:
                if all_values['Newspaper'] > 50.0:
                    print ("Column Name:", str(col), "Total Newspaper", all_values['Newspaper'])

        
            if 'Positive' in all_values and 'Very Positive' in all_values:
                total_positive = all_values['Positive'] + all_values['Very Positive']
#                 print (total_positive)
                if total_positive > 50.0:
                    print ("Column Name:", str(col), 'Total Positive', total_positive)
#                     print ("\n")
            
            if 'Negative' in all_values and 'Very Negative' in all_values:
                total_negative = all_values['Negative'] + all_values['Very Negative']
                if total_negative > 50.0:
                    print ("Column Name:", str(col), 'Total Negative', total_negative)
#                     print ("\n")
            
            if 'Agree' in all_values and 'Strongly Agree' in all_values:
                total_agree = all_values['Agree'] + all_values['Strongly Agree']
    #             print ("Column Name:", str(col), 'Total Agree', total_agree)
                if total_agree > 50.0:
                    print ("Column Name:", str(col), 'Total Agree', total_agree)
                    common_agrees_among_clust.append(str(col))
#                     print ("\n")
                    
            if 'Disagree' in all_values and 'Strongly Disagree' in all_values:
                total_disagree = all_values['Disagree'] + all_values['Strongly Disagree']
    #             print ("Column Name:", str(col), 'Total Disagree', total_disagree)
                if total_disagree > 50.0:
                    print ("Column Name:", str(col), 'Total Disagree', total_disagree)
                    common_disagree_among_clust.append(str(col))
#                     print ("\n")
            
            if "Aid" in all_values:
                if all_values['Aid'] > 50.0:
                    print ("Column Name:", str(col), "Total Aid", all_values['Aid'])
                
            if "Country" in all_values:
                if all_values['Country'] > 50.0:
                    print ("Column Name:", str(col), "Total Country", all_values['Country'])

            if "Drones" in all_values:
                if all_values['Drones'] > 50.0:
                    print ("Column Name:", str(col), "Total Drones", all_values['Drones'])

            if "Iraq" in all_values:
                if all_values['Iraq'] > 50.0:
                    print ("Column Name:", str(col), "Total Iraq", all_values['Iraq'])

            if "Palestinian Israeli Crisis" in all_values: 
                if all_values['Palestinian Israeli Crisis'] > 50.0:
                    print ("Column Name:", str(col), "Total Palestinian Israeli Crisis", all_values['Palestinian Israeli Crisis'])

            if "Presence" in all_values:
                if all_values['Presence'] > 50.0:
                    print ("Column Name:", str(col), "Total Presence", all_values['Presence']) 

            if "Spies" in all_values:
                if all_values['Spies'] > 50.0:
                    print ("Column Name:", str(col), "Total Spies", all_values['Spies'])

            if "Drone Strike" in all_values:
                if all_values['Drone Strike'] > 50.0:
                    print ("Column Name:", str(col), "Total Spies", all_values['Drone Strike']) 

            if "Taiz burnings" in all_values:
                if all_values['Taiz burnings'] > 50.0:
                    print ("Column Name:", str(col), "Total Taiz burnings", all_values['Taiz burnings'])  

            if "Hospital" in all_values:
                if all_values['Hospital'] > 50.0:
                    print ("Column Name:", str(col), "Total Hospital", all_values['Hospital'])

            if "Gas Crisis" in all_values:
                if all_values['Gas Crisis'] > 50.0:
                    print ("Column Name:", str(col), "Total Gas Crisis", all_values['Gas Crisis'])

            if "Electricity Crisis" in all_values:
                if all_values['Electricity Crisis'] > 50.0:
                    print ("Column Name:", str(col), "Total Electricity Crisis", all_values['Electricity Crisis']) 

            if "Terrorism" in all_values:
                if all_values['Terrorism'] > 50.0:
                    print ("Column Name:", str(col), "Total Terrorism", all_values['Terrorism'])

            if "Water Crisis" in all_values:
                if all_values['Water Crisis'] > 50.0:
                    print ("Column Name:", str(col), "Total Water Crisis", all_values['Water Crisis'])

            if "Food Crisis" in all_values:
                if all_values['Food Crisis'] > 50.0:
                    print ("Column Name:", str(col), "Total Food Crisis", all_values['Food Crisis'])

            if "Civil War" in all_values:
                if all_values['Civil War'] > 50.0:
                    print ("Column Name:", str(col), "Total Civil War", all_values['Civil War'])

            if "Religion" in all_values:
                if all_values['Religion'] > 50.0:
                    print ("Column Name:", str(col), "Total Religion", all_values['Religion'])

            if "Family" in all_values:
                if all_values['Family'] > 50.0:
                    print ("Column Name:", str(col), "Total Family", all_values['Family'])

            if "Work or Employment" in all_values:
                if all_values['Work or Employment'] > 50.0:
                    print ("Column Name:", str(col), "Total Work or Employment", all_values['Work or Employment'])

            if "Health" in all_values:
                if all_values['Health'] > 50.0:
                    print ("Column Name:", str(col), "Total Health", all_values['Health'])

            if "Education" in all_values:
                if all_values['Education'] > 50.0:
                    print ("Column Name:", str(col), "Total Education", all_values['Education']) 

            if "Leisure" in all_values:
                if all_values['Leisure'] > 50.0:
                    print ("Column Name:", str(col), "Total Leisure", all_values['Leisure'])

    
            if col == 'q34_choice6':
                total_agrees_among_clust.append(common_agrees_among_clust)
                total_disagree_among_clust.append(common_disagree_among_clust)
                
                
        count += 1
        print ("\n")
    
    print ("AGREE INTERSECTIONS:", set(total_agrees_among_clust[0]).intersection(*total_agrees_among_clust))
    print ("DISAGREE INTERSECTIONS:", set(total_disagree_among_clust[0]).intersection(*total_disagree_among_clust))
    
            
    
majorities(cluster_list)

CLUSTER: 1
Column Name: q1 Total No 76.3565891473
Column Name: q2 Total No 96.511627907
Column Name: q3 Total No 67.8294573643
Column Name: q4 Total Yes 55.4263565891
Column Name: q5 Total No 76.3565891473
Column Name: q6 Total No 74.0310077519
Column Name: q9 Total Agree 65.8914728682
Column Name: q10 Total Agree 66.2790697674
Column Name: q15 Total Agree 58.1395348837
Column Name: q18 Total Agree 51.5503875969
Column Name: q23_choice2 Total Iraq 50.3875968992
Column Name: q23_choice3 Total Drones 53.8759689922
Column Name: q34_choice1 Total Religion 83.3333333333
Column Name: q34_choice2 Total Family 53.1007751938


CLUSTER: 2
Column Name: q1 Total No 83.0508474576
Column Name: q2 Total No 96.0451977401
Column Name: q3 Total No 63.8418079096
Column Name: q4 Total Yes 56.4971751412
Column Name: q5 Total No 76.2711864407
Column Name: q6 Total No 73.4463276836
Column Name: q8 Total Agree 52.5423728814
Column Name: q9 Total Agree 75.1412429379
Column Name: q10 Total Agree 71.7514124294
C