# Firebase sector_score 컬렉션 수정 파일

기존 sector_score => datas 문서에 당일 n.n.p 갯수만 작성되어있었음

고칠 sector_score => 각 일자를 작성한 뒤 해당 일자에 맞는 n.n.p와 이전 일자의 n.n.p 작성

### 과정
sector_detail/"섹터명"/dates/"일자" 에 있는 데이터를 기반하여 추가

데이터 형식
sector_score/"일자"/
```
{
    "IT": 
        [{ "negative" : 1, "neutral" : 23, "positive" : 6 }, 
        { "negative" : 1, "neutral" : 23, "positive" : 6 }],
    ....
}
```
앞에 오는게 해당 일자의 날짜, 뒤에 오는게 이전날 일자

발생할수 있는 문제점
- 모든 일자가 다 존재하는게 아님.  -> 없다면 null 처리
- 섹터별로 존재하는 일자가 다를수 있음 -> 없다면 null 처리


In [None]:
## 시스템 import 위치 지정
import sys
from pathlib import Path

# back 디렉토리를 sys.path에 추가
back_dir = Path(__file__).parent.parent if '__file__' in globals() else Path().resolve().parent
sys.path.append(str(back_dir))

In [19]:
## sectors 불러오기
from data.telegram_raw_datas import sectors

sectors = sectors.keys()

In [20]:
from scripts.firebase.auth import firebase_auth

db = firebase_auth()

In [21]:
def get_sector_counts(db, sector, date):
    try:
        data = db.collection("sector_detail").document(sector).collection("dates").document(date).get().to_dict()
        return data.get("counts") if data else None
    except:
        return None

In [22]:
def get_sector_detail_detail(db,sector,date):
    try:
        data = db.collection("sector_detail").document(sector).collection("detail_dates").document(date).get().to_dict()
        return data
    except Exception as e:
        return e

In [23]:
def weighted_score(data):
    total_weighted_score = 0
    total_views = 0
    valid_channels = 0
    
    for channel_name, channel_data in data.items():
        score = channel_data.get("score", -1)
        if score == -1:  # 유효하지 않은 score는 제외
            continue
        
        # 해당 채널의 최대 views 구하기
        max_views = 0
        for post in channel_data.get("posts", []):
            try:
                views_value = post.get('views')
                current_views = int(views_value) if views_value is not None else 0
            except Exception as e:
                print(f"views 파싱 오류 ({channel_name}): {e}")
                current_views = 0
            max_views = max(max_views, current_views)
        
        # 가중평균 계산을 위한 누적
        total_weighted_score += score * max_views
        total_views += max_views
        valid_channels += 1
    
    # 가중평균 계산
    if total_views == 0:
        # views가 모두 0인 경우 단순 평균으로 대체
        if valid_channels > 0:
            simple_avg = sum(channel_data.get("score", 0) for channel_data in data.values() 
                           if channel_data.get("score", -1) != -1) / valid_channels
            return round(simple_avg, 2)
        else:
            return 0
    
    weighted_average = total_weighted_score / total_views
    
    return round(weighted_average, 2)
    

In [24]:
def calculated_score(db, sector, date):
    data = get_sector_detail_detail(db,sector,date)
    try:
        return weighted_score(data)
    except:
        print(data)


In [25]:
data = calculated_score(db, "IT", "2025-06-01")
print(data)

52.67


In [26]:
def write_db(db, data, date):
    db.collection("sector_score").document(date).set(data)

In [27]:
def make_list(dic, sector, today_data, score_data):
    dic.update({sector:[today_data, score_data]})
    return dic

In [28]:
def main_algoritm(db, today_date):
    data = {}
    for sector in sectors:
        today_data = get_sector_counts(db,sector,today_date)
        score_data = calculated_score(db,sector,today_date)
        data = make_list(data,sector,today_data,score_data)
    print(data)
    return data


In [30]:
from datetime import datetime, timedelta

def process_date_range(start_date, end_date):
    """날짜 범위와 섹터들을 처리하는 함수"""
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    
    current_date = start
    
    while current_date <= end:
        today_str = current_date.strftime("%Y-%m-%d")
        print(f"처리 중인 날짜: {today_str}")
        
        data = main_algoritm(db,today_str)
        write_db(db,data,today_str)
        
        current_date += timedelta(days=1)

# 사용
# start_date = "2025-05-23"
start_date = "2025-08-19"
end_date = "2025-08-23"
process_date_range(start_date, end_date)





처리 중인 날짜: 2025-08-19
{'게임': [{'negative': 2, 'neutral': 12, 'positive': 4}, 51.06], '조선': [{'negative': 12, 'neutral': 10, 'positive': 5}, 44.44], '방산': [{'negative': 4, 'neutral': 10, 'positive': 8}, 58.09], '반도체': [{'negative': 6, 'neutral': 24, 'positive': 3}, 52.51], '이차전지': [{'negative': 2, 'neutral': 10, 'positive': 5}, 54.61], '디스플레이': [{'negative': 3, 'neutral': 6, 'positive': 4}, 53.02], '화장품': [{'negative': 0, 'neutral': 12, 'positive': 10}, 60.59], '자동차': [{'negative': 2, 'neutral': 17, 'positive': 15}, 56.03], '건설': [{'negative': 11, 'neutral': 20, 'positive': 2}, 48.42], '철강': [{'negative': 10, 'neutral': 20, 'positive': 1}, 38.29], '화학': [{'negative': 2, 'neutral': 25, 'positive': 5}, 54.92], '엔터': [{'negative': 2, 'neutral': 14, 'positive': 9}, 58.14], '음식료': [{'negative': 3, 'neutral': 10, 'positive': 6}, 48.94], '패션': [{'negative': 0, 'neutral': 8, 'positive': 3}, 51.45], '풍력': [{'negative': 3, 'neutral': 6, 'positive': 9}, 59.76], '원전': [{'negative': 15, 'neutral': 12