In [None]:
import requests
import pandas as pd

access_token = ""
group_id = "fantasy_i_fantastica"
fields = "sex,bdate,education,relation,relatives"

def get_members():
    members = []
    offset = 0
    count = 1000

    while True:
        response = requests.get(
            "https://api.vk.com/method/groups.getMembers",
            params={
                "group_id": group_id,
                "fields": fields,
                "access_token": access_token,
                "v": "5.199",
                "offset": offset,
                "count": count
            }
        ).json()

        members.extend(response["response"]["items"])
        offset += count

        if offset >= response["response"]["count"]:
            break

    return members

def parse_user(user):
    gender = {1: "Ж", 2: "М"}.get(user.get("sex"), "?")

    bdate = user.get("bdate", "").split(".")
    year = bdate[2] if len(bdate) == 3 else None

    education = []
    if user.get("university_name"):
        education.append(user["university_name"])

    relation = {1: "не женат", 2: "есть друг", 3: "помолвлен",
                4: "женат", 5: "всё сложно", 6: "в активном поиске",
                7: "влюблен", 8: "в гражданском браке"}.get(user.get("relation"))

    children = []
    for relative in user.get("relatives", []):
        if relative.get("type") == "child":
            children.append(relative.get("name", "Нет имени"))

    return {
        "gender": gender if gender else "Нет данных",
        "birth_year": year if year else "Нет данных",
        "education": "Высшее" if education else "Нет данных",
        "relationship_status": relation if relation else "Нет данных",
        "children": "есть" if children else "Нет данных"
    }

members = get_members()
data = [parse_user(u) for u in members if not u.get("deactivated")]

df = pd.DataFrame(data)
df

Unnamed: 0,gender,birth_year,education,relationship_status,children
0,Ж,1989,Нет данных,Нет данных,Нет данных
1,Ж,1983,Нет данных,Нет данных,Нет данных
2,М,Нет данных,Нет данных,Нет данных,Нет данных
3,М,1989,Нет данных,Нет данных,Нет данных
4,М,Нет данных,Нет данных,Нет данных,Нет данных
...,...,...,...,...,...
29460,Ж,2004,Нет данных,Нет данных,Нет данных
29461,М,Нет данных,Нет данных,Нет данных,Нет данных
29462,М,1991,Нет данных,Нет данных,Нет данных
29463,М,2000,Нет данных,Нет данных,Нет данных


In [42]:
df_gender = df[df['gender'] != '?']
df_age = df[df['birth_year'] != 'Нет данных']
df_relationship = df[df['relationship_status'] != 'Нет данных']

In [54]:
df[df['gender'] == 'Ж'].shape
df[df['gender'] == 'М'].shape

(21693, 6)

In [None]:
from google.colab import files

df.to_csv("vk_final.csv", index=False)
files.download('vk_final.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
df = pd.read_csv('/content/vk_final.csv')

In [58]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np

df_age['age'] = 2025 - pd.to_numeric(df_age['birth_year'])

bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]
labels = ['0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80-89']
df_age['age_group'] = pd.cut(df_age['age'], bins=bins, labels=labels, right=False)


fig = make_subplots(
    rows=3, cols=2,
    specs=[
        [{"type": "pie"}, {"type": "bar"}],
        [{"type": "bar"}, {"type": "bar"}],
        [{"type": "pie"}, {"type": "pie"}]
    ],
    subplot_titles=(
        'Распределение по полу',
        'Женщины по возрасту',
        'Мужчины по возрасту',
        'Указавшие брак по возрасту',
        'Процент указавших брак от всех со статусом отношений',
        'Процент указавших детей'
    ),
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

gender_counts = df_gender['gender'].value_counts()
fig.add_trace(
    go.Pie(
        labels=gender_counts.index,
        values=gender_counts.values,
        marker_colors=['lightblue', 'coral'],
        textinfo='percent+label'
    ), row=1, col=1)

female_age = df[df['gender'] == 'Ж']['age_group'].value_counts(normalize=True).sort_index()*100
fig.add_trace(
    go.Bar(
        x=female_age.index,
        y=female_age.values,
        marker_color='coral',
        text=female_age.round(1).values,
        textposition='auto'
    ), row=1, col=2)

male_age = df[df['gender'] == 'М']['age_group'].value_counts(normalize=True).sort_index()*100
fig.add_trace(
    go.Bar(
        x=male_age.index,
        y=male_age.values,
        marker_color='lightblue',
        text=male_age.round(1).values,
        textposition='auto'
    ), row=2, col=1)

married_age = df[df['relationship_status'] == 'женат']['age_group'].value_counts(normalize=True).sort_index()*100
fig.add_trace(
    go.Bar(
        x=married_age.index,
        y=married_age.values,
        marker_color='pink',
        text=married_age.round(1).values,
        textposition='auto'
    ), row=2, col=2)

married_pie = df_relationship['relationship_status'].value_counts(normalize=True).get('женат', 0)*100
fig.add_trace(
    go.Pie(
        values=[married_pie, 100-married_pie],
        labels=['Женаты/замужем', 'Другие статусы'],
        marker_colors=['lightgreen', 'gray'],
        textinfo='percent+label'
    ), row=3, col=1)

columns = ['education', 'relationship_status', 'children']
mask = (df[columns] != 'Нет данных').any(axis=1)
df_extra_info = df[mask]

children_yes = df_extra_info[df_extra_info['children'] == 'есть'].shape[0]
children_pie = (children_yes / len(df)) * 100

fig.add_trace(
    go.Pie(
        values=[children_pie, 100 - children_pie],
        labels=['Есть дети', 'Не указано'],
        marker_colors=['lightgreen', 'gray'],
        rotation = 90,
        textinfo='percent+label'
    ), row=3, col=2)

fig.update_layout(
    title_text='Анализ пользователей сообщества в VK',
    height=1200,
    width=1200,
    showlegend=False,
    template='plotly_white',
    margin=dict(t=100, b=50)
)

fig.update_yaxes(title_text="Процент (%)", row=1, col=2)
fig.update_yaxes(title_text="Процент (%)", row=2, col=1)
fig.update_yaxes(title_text="Процент (%)", row=2, col=2)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

